In [14]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

%matplotlib inline 

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

from sklearn.pipeline import Pipeline

import imblearn
from imblearn.over_sampling import SMOTE

In [63]:
def predictCrop(array):
    
    """ This function takes in an array of 18 numbers: bands, ndvi and 
    season. 
    output: an array of 9 numbers representing the probability of each
    of the 9 crops."""
    
    # load data
    sentinel_1 = pd.read_csv('Sentinel_training_farms_Tuned_s1.csv')
    sentinel_2 = pd.read_csv('Sentinel_training_farms_Tuned_s2.csv')
    
    sentinel_1['season'] = 1
    sentinel_2['season'] = 2

    data = sentinel_1.append(sentinel_2) # concat season1 and season2 data

    data.drop(['system:index', '.geo'], axis=1, inplace=True)
    #sentinel_1.drop(['system:index', '.geo'], axis=1, inplace=True)

    y = data['CID']
    X = data.drop(['CID'], axis=1)
    
    # instantiate model
    model = RandomForestClassifier(n_estimators=250)
    
    # Modeling code in a pipeline
    my_pipeline = Pipeline(steps=[('model', model)])

    #Split data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, shuffle=True, stratify=y, random_state=123)

    # Scaling training data ONLY:
    smt = SMOTE(random_state = 123)
    X_train_smote, y_train_smote = smt.fit_resample(X_train,y_train)
    
    # fit model on scaled training data:
    my_pipeline.fit(X_train_smote, y_train_smote)
    
    return my_pipeline.predict_proba([array])

### e.g.

In [71]:
predictCrop(test2)

array([[0.   , 0.   , 0.   , 0.   , 0.972, 0.008, 0.   , 0.004, 0.016]])

In [72]:
test2 = data.iloc[10150]
test2 # CID=5

B1        0.117100
B10       0.001000
B11       0.146000
B12       0.078000
B2        0.091800
B3        0.083100
B4        0.069400
B5        0.096300
B6        0.176200
B7        0.216100
B8        0.214800
B8A       0.241100
B9        0.054300
NDVI      0.511612
QA10      0.000000
QA20      0.000000
QA60      0.000000
season    1.000000
Name: 10150, dtype: float64