In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
import warnings

In [2]:
data=pd.read_csv('Crop_recommendation.csv')
data=data.sample(frac=1) 
data.head()

Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall,label
2045,75,41,35,24.970426,78.626977,6.856833,166.641525,jute
376,27,69,22,17.916523,24.908147,5.932323,69.14681,kidneybeans
285,37,78,79,19.952648,14.826331,7.786366,88.681031,chickpea
861,19,79,19,20.06004,67.762526,6.677263,42.895091,lentil
2176,86,40,33,26.138787,52.263117,7.432322,136.302777,coffee


In [3]:
data.shape

(2200, 8)

In [4]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 2200 entries, 2045 to 1055
Data columns (total 8 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   N            2200 non-null   int64  
 1   P            2200 non-null   int64  
 2   K            2200 non-null   int64  
 3   temperature  2200 non-null   float64
 4   humidity     2200 non-null   float64
 5   ph           2200 non-null   float64
 6   rainfall     2200 non-null   float64
 7   label        2200 non-null   object 
dtypes: float64(4), int64(3), object(1)
memory usage: 154.7+ KB


In [5]:
data.isnull().sum()

N              0
P              0
K              0
temperature    0
humidity       0
ph             0
rainfall       0
label          0
dtype: int64

In [6]:
data.duplicated().sum()

0

In [7]:
data.describe()

Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall
count,2200.0,2200.0,2200.0,2200.0,2200.0,2200.0,2200.0
mean,50.551818,53.362727,48.149091,25.616244,71.481779,6.46948,103.463655
std,36.917334,32.985883,50.647931,5.063749,22.263812,0.773938,54.958389
min,0.0,5.0,5.0,8.825675,14.25804,3.504752,20.211267
25%,21.0,28.0,20.0,22.769375,60.261953,5.971693,64.551686
50%,37.0,51.0,32.0,25.598693,80.473146,6.425045,94.867624
75%,84.25,68.0,49.0,28.561654,89.948771,6.923643,124.267508
max,140.0,145.0,205.0,43.675493,99.981876,9.935091,298.560117


In [8]:
data['label'].value_counts()

label
jute           100
kidneybeans    100
watermelon     100
coconut        100
maize          100
muskmelon      100
banana         100
mango          100
mungbean       100
orange         100
pigeonpeas     100
mothbeans      100
pomegranate    100
blackgram      100
apple          100
cotton         100
papaya         100
rice           100
coffee         100
lentil         100
chickpea       100
grapes         100
Name: count, dtype: int64

In [9]:
label_map={}
crops=data['label'].unique()
val=1
for crop in crops:
    label_map[crop]=val
    val+=1
data['label_num']=data['label'].map(label_map)
label_map

{'jute': 1,
 'kidneybeans': 2,
 'chickpea': 3,
 'lentil': 4,
 'coffee': 5,
 'rice': 6,
 'papaya': 7,
 'cotton': 8,
 'apple': 9,
 'blackgram': 10,
 'pomegranate': 11,
 'mothbeans': 12,
 'pigeonpeas': 13,
 'orange': 14,
 'mungbean': 15,
 'mango': 16,
 'banana': 17,
 'muskmelon': 18,
 'maize': 19,
 'coconut': 20,
 'watermelon': 21,
 'grapes': 22}

In [10]:
data.drop(columns=['label'],axis=1,inplace=True)
data.head()

Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall,label_num
2045,75,41,35,24.970426,78.626977,6.856833,166.641525,1
376,27,69,22,17.916523,24.908147,5.932323,69.14681,2
285,37,78,79,19.952648,14.826331,7.786366,88.681031,3
861,19,79,19,20.06004,67.762526,6.677263,42.895091,4
2176,86,40,33,26.138787,52.263117,7.432322,136.302777,5


# Train Test Split

In [11]:
X=data.drop(columns=['label_num'],axis=1)
Y=data['label_num']

In [12]:
from sklearn.preprocessing import StandardScaler
scaler=StandardScaler()
scaled_X=scaler.fit_transform(X)

In [13]:
from sklearn.model_selection import train_test_split
X_train,X_test,Y_train,Y_test=train_test_split(scaled_X,Y,train_size=0.70,random_state=42)

In [14]:
X_train.shape, X_test.shape, Y_train.shape, Y_test.shape

((1540, 7), (660, 7), (1540,), (660,))

# Implementing models

In [15]:
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier, ExtraTreeClassifier
from sklearn.ensemble import RandomForestClassifier, BaggingClassifier, GradientBoostingClassifier, AdaBoostClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, precision_score, recall_score, f1_score

In [16]:
models={
    'Logistic Regression': LogisticRegression(class_weight='balanced'),
    'Naive Bayes': GaussianNB(),
    'Support Vector Machine': SVC(),
    'K-Nearest Neighbors': KNeighborsClassifier(),
    'Decision Tree': DecisionTreeClassifier(),
    'Extra Trees': ExtraTreeClassifier(),
    'Random Forest': RandomForestClassifier(),
    'Bagging': BaggingClassifier(),
    'Gradient Boosting': GradientBoostingClassifier(),
}

In [27]:
 for name,model in models.items():
     model.fit(X_train,Y_train)
     predictions=model.predict(X_test)
     #print(f"{name} : Classification report")
     #print(classification_report(Y_test,predictions),'\n')
     print(f"{name} Accuracy score : ", accuracy_score(Y_test,predictions),'\n')
     print(f"{name} Precision score : ", precision_score(Y_test,predictions,average="macro"),'\n')
     print(f"{name} Recall score : ", recall_score(Y_test,predictions,average="macro"),'\n')
     print("*******************************************")

Logistic Regression Accuracy score :  0.9590909090909091 

Logistic Regression Precision score :  0.9624819661524279 

Logistic Regression Recall score :  0.958950456109547 

*******************************************
Naive Bayes Accuracy score :  0.9863636363636363 

Naive Bayes Precision score :  0.9864893171344785 

Naive Bayes Recall score :  0.9857954545454546 

*******************************************
Support Vector Machine Accuracy score :  0.9712121212121212 

Support Vector Machine Precision score :  0.9744353347801623 

Support Vector Machine Recall score :  0.9718614718614718 

*******************************************
K-Nearest Neighbors Accuracy score :  0.9636363636363636 

K-Nearest Neighbors Precision score :  0.9649072067570154 

K-Nearest Neighbors Recall score :  0.9647530499803227 

*******************************************
Decision Tree Accuracy score :  0.9803030303030303 

Decision Tree Precision score :  0.9803988595195997 

Decision Tree Recall score : 

In [18]:
classifier=models['Logistic Regression']
classifier.fit(X_train,Y_train)
predictions=classifier.predict(X_test)

# Predictive system

In [19]:
def recommendation(N,P,k,temperature,humidity,ph,rainfal):
    features=np.array([[N,P,k,temperature,humidity,ph,rainfal]])
    transformed_features=scaler.fit_transform(features)
    prediction=classifier.predict(transformed_features)
    print(prediction)
    return prediction[0]

In [20]:
crop_map={}
for crop in crops:
    crop_map[label_map[crop]]=crop
crop_map

{1: 'jute',
 2: 'kidneybeans',
 3: 'chickpea',
 4: 'lentil',
 5: 'coffee',
 6: 'rice',
 7: 'papaya',
 8: 'cotton',
 9: 'apple',
 10: 'blackgram',
 11: 'pomegranate',
 12: 'mothbeans',
 13: 'pigeonpeas',
 14: 'orange',
 15: 'mungbean',
 16: 'mango',
 17: 'banana',
 18: 'muskmelon',
 19: 'maize',
 20: 'coconut',
 21: 'watermelon',
 22: 'grapes'}

In [21]:
# input 1

N = 49
P = 55
k = 51
temperature = 24.87
humidity = 93.9
ph = 6.67
rainfall = 135

predict = recommendation(N,P,k,temperature,humidity,ph,rainfall)
if predict in crop_map:
    print("{} is a best crop to be cultivated ".format(predict))
else:
    print("Sorry are not able to recommend a proper crop for this environment")

[1]
1 is a best crop to be cultivated 


In [22]:
N = 9
P = 35
k = 20
temperature = 27.4
humidity = 80.9
ph = 6.9
rainfall = 40.53

predict = recommendation(N,P,k,temperature,humidity,ph,rainfall)
if predict in crop_map:
    print("{} is a best crop to be cultivated ".format(predict))
else:
    print("Sorry are not able to recommend a proper crop for this environment")

[1]
1 is a best crop to be cultivated 


In [23]:
data.head(15)

Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall,label_num
2045,75,41,35,24.970426,78.626977,6.856833,166.641525,1
376,27,69,22,17.916523,24.908147,5.932323,69.14681,2
285,37,78,79,19.952648,14.826331,7.786366,88.681031,3
861,19,79,19,20.06004,67.762526,6.677263,42.895091,4
2176,86,40,33,26.138787,52.263117,7.432322,136.302777,5
37,95,39,36,23.863305,83.152508,5.561399,285.249365,6
2020,85,53,38,24.900757,73.841864,6.588017,153.899098,1
33,98,53,38,20.267076,81.638952,5.014507,270.441727,6
1733,39,70,52,26.265595,90.796681,6.651491,59.493734,7
2172,111,29,31,26.059684,52.310985,6.136287,161.343254,5


In [24]:
N = 49
P = 70
k = 76
temperature = 19.7
humidity = 17.6
ph = 6.61
rainfall = 85.57

predict = recommendation(N,P,k,temperature,humidity,ph,rainfall)
if predict in crop_map:
    print("{} is a best crop to be cultivated ".format(predict))
else:
    print("Sorry are not able to recommend a proper crop for this environment")

[1]
1 is a best crop to be cultivated 


In [25]:
model=LogisticRegression()
model.fit(X_train,Y_train)
p=model.predict(X_train)
f1=f1_score(Y_train,p,average="macro")
p=model.predict(X_test)
f2=f1_score(Y_test,p,average="macro")
f1,f2

(0.9778794142726795, 0.9579221773206545)