In [17]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

%matplotlib inline 

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

from sklearn.pipeline import Pipeline

import imblearn
from imblearn.over_sampling import SMOTE
import pickle 

In [4]:
# load data
sentinel_1 = pd.read_csv('Sentinel_training_farms_Tuned_s1.csv')
sentinel_2 = pd.read_csv('Sentinel_training_farms_Tuned_s2.csv')

sentinel_1['season'] = 1
sentinel_2['season'] = 2

data = sentinel_1.append(sentinel_2) # concat season1 and season2 data

data.drop(['system:index', '.geo'], axis=1, inplace=True)

y = data['CID']
X = data.drop(['CID'], axis=1)

In [22]:
def trainModel(X,y):
    
    """ This function takes in an array of 18 numbers: bands, ndvi and 
    season. 
    output: an array of 9 numbers representing the probability of each
    of the 9 crops."""
    
    #Split data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, shuffle=True, stratify=y, random_state=123)
        
    # instantiate model
    model = RandomForestClassifier(n_estimators=250)
    
    # Modeling code in a pipeline
    my_pipeline = Pipeline(steps=[('model', model)])

    
    # Scaling training data ONLY:
    smt = SMOTE(random_state = 123)
    X_train_smote, y_train_smote = smt.fit_resample(X_train,y_train)
    
    # fit model on scaled training data:
    model = my_pipeline.fit(X_train_smote, y_train_smote)
    
    # save the model to disk
    filename = 'trained_model.sav'
    #pickle.dump(my_pipeline, open(filename, 'wb'))
    
    return pickle.dump(model, open(filename, 'wb'))

In [23]:
trained_rf_model = trainModel(X,y)

In [34]:
def predictCrop(array):
    
     """ This function loads model from disk and predicts
     the probability of copy type.
     input: array of the 13 Sentinel bands, QA10,20 and 60,
     ndvi, and season."""
    
    # load the model from disk
    filename = 'trained_model.sav'
    trained_model = pickle.load(open(filename, 'rb'))
    
    # Predict crop type
    predicted_crop_type = trained_model.predict_proba([array])

    return predicted_crop_type