# Data Loading and Exploration

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import make_pipeline
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [None]:
df=pd.read_csv("MainCrop_recommendation.csv")
df.sample(10)

Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall,label
133,64,35,23,23.020383,61.89472,5.680361,63.038434,maize
1371,120,25,50,28.054578,94.816374,6.32721,21.848693,muskmelon
488,27,71,24,31.464179,48.176315,7.064973,165.405354,pigeonpeas
832,2,78,23,21.318521,66.439346,7.320515,45.426168,lentil
645,2,39,15,28.072196,82.911647,6.478557,49.618653,mungbean
1120,39,145,201,36.731266,80.589319,5.7756,72.242308,grapes
1511,122,59,18,23.500099,83.63489,6.219469,79.813282,cotton
1062,91,75,55,27.48613,76.112398,6.212369,109.276885,banana
1177,16,130,201,29.120338,82.790929,5.682395,68.850305,grapes
184,61,59,17,23.338446,59.245806,6.474443,105.008314,maize


In [83]:
df.describe()

Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall
count,1800.0,1800.0,1800.0,1800.0,1800.0,1800.0,1800.0
mean,57.206111,54.394444,43.826111,25.530113,69.060817,6.5355,99.037751
std,37.289357,27.64628,41.453287,4.997321,22.257622,0.77453,57.600118
min,0.0,5.0,15.0,8.825675,14.25804,3.504752,20.211267
25%,25.0,37.0,20.0,22.804735,60.261953,6.058154,57.798107
50%,50.0,55.0,35.0,25.490918,78.03683,6.50283,81.017378
75%,91.0,68.0,49.0,28.28036,84.631797,6.968597,122.704272
max,140.0,145.0,205.0,43.675493,94.998975,9.935091,298.560117


# Exploratory Data Analysis

In [47]:
from ydata_profiling import ProfileReport
prof = ProfileReport(df)
prof.to_file(output_file='maindataset.html')

Summarize dataset:   0%|          | 0/5 [00:00<?, ?it/s]


[A%|          | 0/8 [00:00<?, ?it/s]
100%|██████████| 8/8 [00:00<00:00, 72.59it/s]


Generate report structure:   0%|          | 0/1 [00:00<?, ?it/s]

Render HTML:   0%|          | 0/1 [00:00<?, ?it/s]

Export report to file:   0%|          | 0/1 [00:00<?, ?it/s]

In [84]:
print(df['label'].unique())

['rice' 'maize' 'chickpea' 'kidneybeans' 'pigeonpeas' 'mothbeans'
 'mungbean' 'blackgram' 'lentil' 'pomegranate' 'banana' 'grapes'
 'watermelon' 'muskmelon' 'papaya' 'cotton' 'jute' 'coffee']


# Data Preprocessing and splitting

In [85]:
X_train,X_test,y_train,y_test=train_test_split(df.drop(columns=['label']),
                                                df['label'],test_size=0.2,
                                                random_state=42,
                                               stratify=df['label'])

In [86]:
X_train.head()

Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall
676,22,37,20,27.627495,86.493669,6.605733,39.261376
451,24,73,20,19.637362,32.315289,4.608695,176.413409
1487,53,55,55,33.323157,91.252712,6.709669,234.496633
335,33,59,22,22.642369,21.593961,5.947,122.388601
805,29,71,18,22.175,62.138738,6.410441,53.466226


In [87]:
X_test.sample(3)

Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall
269,34,76,80,20.656918,15.845726,7.985417,65.238111
1430,44,56,49,39.233425,91.255893,6.51978,64.44785
169,96,54,22,25.701967,61.334504,6.960358,83.207113


In [88]:
y_train.unique()

array(['mungbean', 'pigeonpeas', 'papaya', 'kidneybeans', 'lentil',
       'pomegranate', 'mothbeans', 'watermelon', 'grapes', 'jute',
       'banana', 'rice', 'maize', 'cotton', 'coffee', 'blackgram',
       'chickpea', 'muskmelon'], dtype=object)

In [89]:
y_test.unique()

array(['pomegranate', 'cotton', 'muskmelon', 'blackgram', 'jute',
       'lentil', 'mothbeans', 'grapes', 'maize', 'banana', 'coffee',
       'chickpea', 'papaya', 'mungbean', 'rice', 'watermelon',
       'kidneybeans', 'pigeonpeas'], dtype=object)

#### Encoding Target variable

In [90]:

le=LabelEncoder()
y_train_encoded=le.fit_transform(y_train)
y_test_encoded=le.transform(y_test)


In [91]:
for i, crop in enumerate(le.classes_):
    print(f"  {i}: {crop}")


  0: banana
  1: blackgram
  2: chickpea
  3: coffee
  4: cotton
  5: grapes
  6: jute
  7: kidneybeans
  8: lentil
  9: maize
  10: mothbeans
  11: mungbean
  12: muskmelon
  13: papaya
  14: pigeonpeas
  15: pomegranate
  16: rice
  17: watermelon


#### Scaling 

In [113]:
#creating column transformer to scale using standard scaler
trf1=ColumnTransformer([('scaler',StandardScaler(),slice(0,6))],remainder='passthrough')
    

In [157]:
#train the model
trf2= RandomForestClassifier()


In [158]:
pipe = make_pipeline(trf1,trf2)

In [159]:
pipe.fit(X_train,y_train_encoded)

In [160]:
# To Display Pipeline
from sklearn import set_config
set_config(display='diagram')

In [181]:
#Predict
y_pred_encoded=pipe.predict(X_test)
y_pred_original=le.inverse_transform(y_pred_encoded)

In [183]:
#Checking accuray
accuracy_score(y_test_encoded,y_pred_encoded)

0.9916666666666667

In [184]:
# Classification report with original labels
print("\nClassification Report:")
print(classification_report(y_test, y_pred_original))



Classification Report:
              precision    recall  f1-score   support

      banana       1.00      1.00      1.00        20
   blackgram       1.00      1.00      1.00        20
    chickpea       1.00      1.00      1.00        20
      coffee       1.00      1.00      1.00        20
      cotton       1.00      1.00      1.00        20
      grapes       1.00      1.00      1.00        20
        jute       0.87      1.00      0.93        20
 kidneybeans       1.00      1.00      1.00        20
      lentil       1.00      1.00      1.00        20
       maize       1.00      1.00      1.00        20
   mothbeans       1.00      1.00      1.00        20
    mungbean       1.00      1.00      1.00        20
   muskmelon       1.00      1.00      1.00        20
      papaya       1.00      1.00      1.00        20
  pigeonpeas       1.00      1.00      1.00        20
 pomegranate       1.00      1.00      1.00        20
        rice       1.00      0.85      0.92        20
  w

In [179]:
# cross validation using cross_val_score
from sklearn.model_selection import cross_val_score
cross_val_score(pipe, X_train, y_train, cv=10, scoring='accuracy').mean()

np.float64(0.99375)