In [54]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report
from sklearn.metrics import f1_score
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import GradientBoostingClassifier
import pickle
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import cross_val_score
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.svm import SVC
from sklearn.metrics import precision_recall_fscore_support

In [55]:
SEED = 69

In [56]:
data = pd.read_csv('E:\DataScience-Projects\Crop_Recommendation\Data\Raw\Crop_Recommendation.csv')

In [57]:
x, y = data.drop('label', axis = 'columns'), data.label

In [58]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.10, random_state = 0)

In [59]:
scaler = MinMaxScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled = scaler.transform(x_test)

In [60]:
def GetModels():
    basedModels = []
    basedModels.append(('LR'   , LogisticRegression(n_jobs = -1)))
    basedModels.append(('DRC' , DecisionTreeClassifier()))
    basedModels.append(('AB'   , AdaBoostClassifier()))
    basedModels.append(('GBM'  , GradientBoostingClassifier()))
    basedModels.append(('RF'   , RandomForestClassifier(n_jobs = -1, n_estimators=200)))
    basedModels.append(('ET'   , ExtraTreesClassifier(n_jobs = -1, n_estimators=200)))
    basedModels.append(('SVC'   , SVC()))
    return basedModels

In [75]:
def print_report(x_train, y_train, x_test, y_test, models):
    
    for name, model in models:
        model.fit(x_train, y_train)
        y_pred = model.predict(x_test)
        print('*****',name,'*****')
        report = classification_report(y_test, y_pred)
        print(report)
        

    
        
        
        
        

In [76]:
models = GetModels()
metrics = print_report(x_train, y_train, x_test, y_test, models)

***** LR *****
              precision    recall  f1-score   support

       apple       1.00      1.00      1.00         8
      banana       1.00      1.00      1.00         8
   blackgram       1.00      0.73      0.84        11
    chickpea       1.00      1.00      1.00        10
     coconut       1.00      1.00      1.00         9
      coffee       1.00      1.00      1.00        10
      cotton       0.75      1.00      0.86         6
      grapes       1.00      1.00      1.00         6
        jute       0.91      1.00      0.95        10
 kidneybeans       1.00      1.00      1.00        10
      lentil       0.90      1.00      0.95         9
       maize       0.91      0.83      0.87        12
       mango       1.00      1.00      1.00        10
   mothbeans       0.94      1.00      0.97        15
    mungbean       1.00      1.00      1.00        12
   muskmelon       1.00      1.00      1.00        12
      orange       1.00      1.00      1.00        13
      papaya

  _warn_prf(average, modifier, msg_start, len(result))


***** GBM *****
              precision    recall  f1-score   support

       apple       1.00      1.00      1.00         8
      banana       1.00      1.00      1.00         8
   blackgram       1.00      1.00      1.00        11
    chickpea       1.00      1.00      1.00        10
     coconut       1.00      1.00      1.00         9
      coffee       1.00      1.00      1.00        10
      cotton       1.00      1.00      1.00         6
      grapes       1.00      1.00      1.00         6
        jute       1.00      1.00      1.00        10
 kidneybeans       1.00      1.00      1.00        10
      lentil       1.00      1.00      1.00         9
       maize       1.00      1.00      1.00        12
       mango       1.00      1.00      1.00        10
   mothbeans       1.00      1.00      1.00        15
    mungbean       1.00      1.00      1.00        12
   muskmelon       1.00      1.00      1.00        12
      orange       1.00      1.00      1.00        13
      papay

In [65]:
models[]

Unnamed: 0,Model,Precision,Recall,F1,Support
0,SVC,1.0,1.0,1.0,8
1,SVC,1.0,1.0,1.0,8
2,SVC,0.916667,1.0,0.956522,11
3,SVC,1.0,1.0,1.0,10
4,SVC,1.0,1.0,1.0,9
5,SVC,1.0,1.0,1.0,10
6,SVC,1.0,1.0,1.0,6
7,SVC,1.0,1.0,1.0,6
8,SVC,1.0,1.0,1.0,10
9,SVC,0.769231,1.0,0.869565,10


In [32]:
def ScoreModels(X_train, y_train,models):
    print('****Traianing Models****')

    num_folds = 10
    results = []
    names = []
    for name, model in models:
        kfold = StratifiedKFold(n_splits=num_folds, random_state=SEED, shuffle = True)
        cv_results = cross_val_score(model, X_train, y_train, cv=kfold, scoring='accuracy', n_jobs = -1)
        results.append(cv_results)
        names.append(name)
        msg = "%s: %f (%f)" % (name, cv_results.mean(), cv_results.std())
        print(msg)
        
    return names, results

In [33]:
models = GetModels()

names,results = ScoreModels(x_train, y_train,models)

Traianing Models
LR: 0.959596 (0.012777)
DRC: 0.988384 (0.005997)
AB: 0.139394 (0.003350)
GBM: 0.990404 (0.004195)
RF: 0.994949 (0.003912)
ET: 0.990909 (0.007423)
SVC: 0.976263 (0.012381)


In [34]:
names,results = ScoreModels(x_train_scaled, y_train,models)

Traianing Models
LR: 0.940404 (0.009793)
DRC: 0.988384 (0.005556)
AB: 0.139394 (0.003350)
GBM: 0.990909 (0.003779)
RF: 0.994444 (0.004195)
ET: 0.991919 (0.007889)
SVC: 0.983333 (0.010114)


In [43]:
x_train

Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall
98,78,35,44,26.543481,84.673536,7.072656,183.622266
30,73,57,41,21.446540,84.943760,5.824709,272.201720
712,52,63,19,29.589490,68.321768,6.928899,67.530212
1747,34,65,48,41.419684,90.038631,6.665025,199.309643
703,42,73,25,34.036792,67.211138,6.501869,73.235736
...,...,...,...,...,...,...,...
1033,102,71,48,28.654563,79.286937,5.695268,102.463378
1731,34,68,51,27.347349,94.177567,6.687088,40.351531
763,35,64,15,28.474423,63.536045,6.500145,69.527441
835,39,65,23,25.434598,69.126134,7.685959,41.026829


In [68]:
models[3][1].predict


<bound method GradientBoostingClassifier.predict of GradientBoostingClassifier()>

In [77]:
rfc = RandomForestClassifier()
rfc.fit(x_train, y_train)
y_pred = rfc.predict(x_test)
report = classification_report(y_test, y_pred, output_dict=True)

In [78]:
report

{'apple': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 8},
 'banana': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 8},
 'blackgram': {'precision': 1.0,
  'recall': 1.0,
  'f1-score': 1.0,
  'support': 11},
 'chickpea': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 10},
 'coconut': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 9},
 'coffee': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 10},
 'cotton': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 6},
 'grapes': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 6},
 'jute': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 10},
 'kidneybeans': {'precision': 1.0,
  'recall': 1.0,
  'f1-score': 1.0,
  'support': 10},
 'lentil': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 9},
 'maize': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 12},
 'mango': {'precision': 1.0, 'recall': 1.0, 'f1-sco