In [1]:
import numpy as np
import pandas as pd
import joblib as jb
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix,accuracy_score,classification_report

In [2]:
df=pd.read_csv("Crop_recommendation.csv")
df.head()

Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall,label
0,90,42,43,20.879744,82.002744,6.502985,202.935536,rice
1,85,58,41,21.770462,80.319644,7.038096,226.655537,rice
2,60,55,44,23.004459,82.320763,7.840207,263.964248,rice
3,74,35,40,26.491096,80.158363,6.980401,242.864034,rice
4,78,42,42,20.130175,81.604873,7.628473,262.71734,rice


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2200 entries, 0 to 2199
Data columns (total 8 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   N            2200 non-null   int64  
 1   P            2200 non-null   int64  
 2   K            2200 non-null   int64  
 3   temperature  2200 non-null   float64
 4   humidity     2200 non-null   float64
 5   ph           2200 non-null   float64
 6   rainfall     2200 non-null   float64
 7   label        2200 non-null   object 
dtypes: float64(4), int64(3), object(1)
memory usage: 137.6+ KB


In [4]:
df.dropna(inplace=True)

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2200 entries, 0 to 2199
Data columns (total 8 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   N            2200 non-null   int64  
 1   P            2200 non-null   int64  
 2   K            2200 non-null   int64  
 3   temperature  2200 non-null   float64
 4   humidity     2200 non-null   float64
 5   ph           2200 non-null   float64
 6   rainfall     2200 non-null   float64
 7   label        2200 non-null   object 
dtypes: float64(4), int64(3), object(1)
memory usage: 154.7+ KB


In [6]:
df['label'].unique()

array(['rice', 'maize', 'chickpea', 'kidneybeans', 'pigeonpeas',
       'mothbeans', 'mungbean', 'blackgram', 'lentil', 'pomegranate',
       'banana', 'mango', 'grapes', 'watermelon', 'muskmelon', 'apple',
       'orange', 'papaya', 'coconut', 'cotton', 'jute', 'coffee'],
      dtype=object)

In [8]:
maps = {}
for variable in ['label']:
    mappings = df[variable].factorize()[1]
    df[variable] = df[variable].factorize()[0]
    maps[variable] = {k: i for i, k in enumerate(mappings)}
df.head()


Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall,label
0,90,42,43,20.879744,82.002744,6.502985,202.935536,0
1,85,58,41,21.770462,80.319644,7.038096,226.655537,0
2,60,55,44,23.004459,82.320763,7.840207,263.964248,0
3,74,35,40,26.491096,80.158363,6.980401,242.864034,0
4,78,42,42,20.130175,81.604873,7.628473,262.71734,0


In [9]:
with open("map_crop.txt", "w") as file:
    file.write(str(maps))

In [10]:
df['label'].unique()

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21], dtype=int64)

In [11]:
x=df.iloc[:,:-1]
y=df.iloc[:,-1:]

In [13]:
xtrain,xtest,ytrain,ytest=train_test_split(x,y,test_size=0.2)

In [14]:
clf_svc=SVC()
clf_dt=DecisionTreeClassifier()
clf_rf=RandomForestClassifier()

In [15]:
clf_svc.fit(xtrain,ytrain)
clf_dt.fit(xtrain,ytrain)
clf_rf.fit(xtrain,ytrain)

  return f(*args, **kwargs)
  This is separate from the ipykernel package so we can avoid doing imports until


RandomForestClassifier()

In [16]:
ypredict_svc=clf_svc.predict(xtest)
ypredict_dt=clf_dt.predict(xtest)
ypredict_rf=clf_rf.predict(xtest)

In [17]:
print(accuracy_score(ytest,ypredict_svc))
print(accuracy_score(ytest,ypredict_dt))
print(accuracy_score(ytest,ypredict_rf))

0.990909090909091
0.9931818181818182
0.9954545454545455


In [18]:
print(classification_report(ytest,ypredict_svc),"\n\n")
print(classification_report(ytest,ypredict_dt),"\n\n")
print(classification_report(ytest,ypredict_rf),"\n\n")

              precision    recall  f1-score   support

           0       1.00      0.95      0.97        19
           1       1.00      0.95      0.98        21
           2       1.00      1.00      1.00        28
           3       0.95      1.00      0.97        19
           4       1.00      0.95      0.97        19
           5       1.00      0.95      0.98        22
           6       1.00      1.00      1.00        23
           7       0.96      1.00      0.98        22
           8       1.00      1.00      1.00        16
           9       1.00      1.00      1.00        20
          10       1.00      1.00      1.00        27
          11       1.00      1.00      1.00        19
          12       1.00      1.00      1.00        15
          13       1.00      1.00      1.00        18
          14       1.00      1.00      1.00        29
          15       1.00      1.00      1.00        17
          16       1.00      1.00      1.00        26
          17       1.00    

In [19]:
print(confusion_matrix(ytest,ypredict_svc),"\n\n")
print(confusion_matrix(ytest,ypredict_dt),"\n\n")
print(confusion_matrix(ytest,ypredict_rf),"\n\n")

[[18  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  1  0]
 [ 0 20  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  1  0  0]
 [ 0  0 28  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0 19  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  1 18  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0 21  0  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0 23  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0 22  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0 16  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0  0 20  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0  0  0 27  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0  0  0  0 19  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0  0  0  0  0 15  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0  0  0  0  0  0 18  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0  0  0  0

In [20]:
jb.dump(clf_svc,'crop_svc.pkl')
jb.dump(clf_dt,'crop_dt.pkl')
jb.dump(clf_rf,'crop_rf.pkl')

['crop_rf.pkl']