In [9]:
import pandas as pd
import xgboost as xg 
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import LabelEncoder
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from typing import List

In [3]:
df = pd.read_csv('Crop_recommendation.csv')

df

Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall,label
0,90,42,43,20.879744,82.002744,6.502985,202.935536,rice
1,85,58,41,21.770462,80.319644,7.038096,226.655537,rice
2,60,55,44,23.004459,82.320763,7.840207,263.964248,rice
3,74,35,40,26.491096,80.158363,6.980401,242.864034,rice
4,78,42,42,20.130175,81.604873,7.628473,262.717340,rice
...,...,...,...,...,...,...,...,...
1692,117,86,48,28.695620,82.541958,6.225225,116.161684,banana
1693,114,94,53,26.335449,76.853201,6.190757,118.685826,banana
1694,110,78,50,25.937302,78.898644,5.915569,98.217475,banana
1695,94,70,48,25.136865,84.883944,6.195152,91.464425,banana


In [5]:
df['temperature'] = df['temperature'].astype(int)
df['humidity'] = df['humidity'].astype(int)
df['ph'] = df['ph'].astype(int)
df['rainfall'] = df['rainfall'].astype(int)

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1697 entries, 0 to 1696
Data columns (total 8 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   N            1697 non-null   int64 
 1   P            1697 non-null   int64 
 2   K            1697 non-null   int64 
 3   temperature  1697 non-null   int64 
 4   humidity     1697 non-null   int64 
 5   ph           1697 non-null   int64 
 6   rainfall     1697 non-null   int64 
 7   label        1697 non-null   object
dtypes: int64(7), object(1)
memory usage: 106.2+ KB


In [6]:
df['label'].unique()

array(['rice', 'maize', 'Soyabeans', 'beans', 'peas', 'groundnuts',
       'cowpeas', 'banana', 'mango', 'grapes', 'watermelon', 'apple',
       'orange', 'cotton', 'coffee'], dtype=object)

In [7]:
le = LabelEncoder()
df['label'] = le.fit_transform(df['label'])

df['label']

0       13
1       13
2       13
3       13
4       13
        ..
1692     2
1693     2
1694     2
1695     2
1696     2
Name: label, Length: 1697, dtype: int64

In [12]:
# print(le.classes_)
from typing import List

crops: List = ['Soyabeans', 'apple', 'banana', 'beans', 'coffee', 'cotton', 'cowpeas', 'grapes',
 'groundnuts', 'maize', 'mango', 'orange', 'peas', 'rice', 'watermelon']

crops_dict = dict(enumerate(crops))
crops_dict

{0: 'Soyabeans',
 1: 'apple',
 2: 'banana',
 3: 'beans',
 4: 'coffee',
 5: 'cotton',
 6: 'cowpeas',
 7: 'grapes',
 8: 'groundnuts',
 9: 'maize',
 10: 'mango',
 11: 'orange',
 12: 'peas',
 13: 'rice',
 14: 'watermelon'}

In [13]:
x = df.drop('label',axis=1)
y = df['label']

x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.10,shuffle=True)

print(x_train.shape)
print(x_test.shape)
print(y_train.shape)
print(y_test.shape)

(1527, 7)
(170, 7)
(1527,)
(170,)


In [14]:
rf_model = RandomForestClassifier()

rf_model.fit(x_train, y_train)

In [16]:
y_pred = rf_model.predict(x_test)

# print("Confusion Matrix:\n",confusion_matrix(y_test,y_pred))
print()
print("Classification Report:\n",classification_report(y_test,y_pred))


Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00         9
           1       1.00      1.00      1.00         9
           2       1.00      1.00      1.00         8
           3       1.00      1.00      1.00        19
           4       1.00      1.00      1.00        14
           5       1.00      1.00      1.00         4
           6       1.00      1.00      1.00        13
           7       1.00      1.00      1.00         8
           8       1.00      1.00      1.00        11
           9       1.00      1.00      1.00        10
          10       1.00      1.00      1.00        15
          11       1.00      1.00      1.00        12
          12       1.00      1.00      1.00         8
          13       1.00      1.00      1.00        16
          14       1.00      1.00      1.00        14

    accuracy                           1.00       170
   macro avg       1.00      1.00      1.00       170
w

In [17]:
cross_validation_scores = cross_val_score(rf_model, x, y, cv=5, scoring='accuracy')

print('Cross validation scores')
print(cross_validation_scores)

Cross validation scores
[1. 1. 1. 1. 1.]
