In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

%matplotlib inline 

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

from sklearn.pipeline import Pipeline

import imblearn
from imblearn.over_sampling import SMOTE
import pickle 

In [2]:
# load data
data= pd.read_csv('Sentinel_training_V2.csv')

y = data['CID']
X = data.drop(['CID'], axis=1)

In [3]:
X.head()

Unnamed: 0,B1,B10,B11,B12,B2,B3,B4,B5,B6,B7,B8,B8A,B9,NDVI
0,0.1242,0.0013,0.2386,0.1288,0.1175,0.1193,0.1126,0.1448,0.3292,0.3911,0.3598,0.4117,0.0819,0.523285
1,0.1302,0.0012,0.2227,0.1028,0.0922,0.0919,0.0518,0.1136,0.382,0.4577,0.4466,0.4833,0.0969,0.792135
2,0.1242,0.0013,0.2216,0.101,0.092,0.0913,0.0518,0.1177,0.3874,0.4658,0.4506,0.4982,0.0819,0.79379
3,0.1302,0.0012,0.2017,0.0812,0.09,0.0906,0.0461,0.1045,0.4114,0.5028,0.4752,0.5215,0.0969,0.823134
4,0.1302,0.0012,0.2048,0.0826,0.0902,0.0905,0.0479,0.1055,0.4162,0.506,0.479,0.5341,0.0969,0.818182


In [4]:
def trainModel(X,y):
    
    #Split data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, shuffle=True, stratify=y, random_state=123)
        
    # instantiate model
    model = RandomForestClassifier(n_estimators=250)
    
    # Modeling code in a pipeline
    my_pipeline = Pipeline(steps=[('model', model)])

    
    # Scaling training data ONLY:
    smt = SMOTE(random_state = 123)
    X_train_smote, y_train_smote = smt.fit_resample(X_train,y_train)
    
    # fit model on scaled training data:
    model = my_pipeline.fit(X_train_smote, y_train_smote)
    
    # save the model to disk
    filename = 'trained_model_sentinelV2_smote.sav'
    #pickle.dump(my_pipeline, open(filename, 'wb'))
    
    return pickle.dump(model, open(filename, 'wb'))


In [5]:
trained_rf_model = trainModel(X,y)

In [8]:
def predictCrop(array):

    """ This function loads model from disk and predicts
    the probability of copy type.
    input: array of the 13 Sentinel bands, and ndvi."""

    # load the model from disk
    filename = 'trained_model_sentinelV2_smote.sav'
    trained_model = pickle.load(open(filename, 'rb'))

    # Predict crop type
    predicted_crop_type = trained_model.predict_proba([array])

    return predicted_crop_type