In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
import warnings

In [2]:
data=pd.read_csv('Crop_recommendation.csv')
data=data.sample(frac=1) 
data.head()

Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall,label
1181,2,36,31,30.902252,49.959555,5.731719,91.775226,mango
669,9,35,20,27.415035,80.980047,6.913809,40.531732,mungbean
1661,12,6,8,30.84835,92.867737,6.388617,107.414268,orange
1961,107,43,18,22.426733,81.534808,6.745104,65.544758,cotton
207,59,70,84,17.334868,18.74927,7.550808,82.617347,chickpea


In [3]:
data.shape

(2200, 8)

In [4]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 2200 entries, 1181 to 1289
Data columns (total 8 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   N            2200 non-null   int64  
 1   P            2200 non-null   int64  
 2   K            2200 non-null   int64  
 3   temperature  2200 non-null   float64
 4   humidity     2200 non-null   float64
 5   ph           2200 non-null   float64
 6   rainfall     2200 non-null   float64
 7   label        2200 non-null   object 
dtypes: float64(4), int64(3), object(1)
memory usage: 154.7+ KB


In [5]:
data.isnull().sum()

N              0
P              0
K              0
temperature    0
humidity       0
ph             0
rainfall       0
label          0
dtype: int64

In [6]:
data.duplicated().sum()

0

In [7]:
data.describe()

Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall
count,2200.0,2200.0,2200.0,2200.0,2200.0,2200.0,2200.0
mean,50.551818,53.362727,48.149091,25.616244,71.481779,6.46948,103.463655
std,36.917334,32.985883,50.647931,5.063749,22.263812,0.773938,54.958389
min,0.0,5.0,5.0,8.825675,14.25804,3.504752,20.211267
25%,21.0,28.0,20.0,22.769375,60.261953,5.971693,64.551686
50%,37.0,51.0,32.0,25.598693,80.473146,6.425045,94.867624
75%,84.25,68.0,49.0,28.561654,89.948771,6.923643,124.267508
max,140.0,145.0,205.0,43.675493,99.981876,9.935091,298.560117


In [8]:
data['label'].value_counts()

label
mango          100
mungbean       100
banana         100
coconut        100
watermelon     100
lentil         100
apple          100
pigeonpeas     100
muskmelon      100
papaya         100
maize          100
coffee         100
jute           100
mothbeans      100
kidneybeans    100
pomegranate    100
blackgram      100
grapes         100
chickpea       100
cotton         100
orange         100
rice           100
Name: count, dtype: int64

In [9]:
label_map={}
crops=data['label'].unique()
val=1
for crop in crops:
    label_map[crop]=val
    val+=1
data['label_num']=data['label'].map(label_map)
label_map

{'mango': 1,
 'mungbean': 2,
 'orange': 3,
 'cotton': 4,
 'chickpea': 5,
 'grapes': 6,
 'blackgram': 7,
 'pomegranate': 8,
 'kidneybeans': 9,
 'mothbeans': 10,
 'jute': 11,
 'coffee': 12,
 'maize': 13,
 'papaya': 14,
 'muskmelon': 15,
 'pigeonpeas': 16,
 'apple': 17,
 'lentil': 18,
 'watermelon': 19,
 'coconut': 20,
 'banana': 21,
 'rice': 22}

In [10]:
data.drop(columns=['label'],axis=1,inplace=True)
data.head()

Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall,label_num
1181,2,36,31,30.902252,49.959555,5.731719,91.775226,1
669,9,35,20,27.415035,80.980047,6.913809,40.531732,2
1661,12,6,8,30.84835,92.867737,6.388617,107.414268,3
1961,107,43,18,22.426733,81.534808,6.745104,65.544758,4
207,59,70,84,17.334868,18.74927,7.550808,82.617347,5


# Train Test Split

In [11]:
X=data.drop(columns=['label_num'],axis=1)
Y=data['label_num']

In [12]:
from sklearn.preprocessing import MinMaxScaler
scaler=MinMaxScaler()
scaled_X=scaler.fit_transform(X)

In [13]:
from sklearn.model_selection import train_test_split
X_train,X_test,Y_train,Y_test=train_test_split(scaled_X,Y,train_size=0.8,random_state=42)

In [14]:
X_train.shape, X_test.shape, Y_train.shape, Y_test.shape

((1760, 7), (440, 7), (1760,), (440,))

# Implementing models

In [15]:
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier, ExtraTreeClassifier
from sklearn.ensemble import RandomForestClassifier, BaggingClassifier, GradientBoostingClassifier, AdaBoostClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, precision_score, recall_score, f1_score

In [16]:
models={
    'Logistic Regression': LogisticRegression(),
    'Naive Bayes': GaussianNB(),
    'Support Vector Machine': SVC(),
    'K-Nearest Neighbors': KNeighborsClassifier(),
    'Decision Tree': DecisionTreeClassifier(),
    'Extra Trees': ExtraTreeClassifier(),
    'Random Forest': RandomForestClassifier(),
    'Bagging': BaggingClassifier(),
    'Gradient Boosting': GradientBoostingClassifier(),
}

In [17]:
for name,model in models.items():
    model.fit(X_train,Y_train)
    predictions=model.predict(X_test)
    print(f"{name} : Classification report")
    print(classification_report(Y_test,predictions),'\n')
    print(f"{name} Precision score : ", precision_score(Y_test,predictions,average="macro"),'\n')
    print(f"{name} Recall score : ", recall_score(Y_test,predictions,average="macro"),'\n')
    print("*******************************************")

Logistic Regression : Classification report
              precision    recall  f1-score   support

           1       0.96      1.00      0.98        22
           2       0.87      1.00      0.93        20
           3       1.00      0.89      0.94        18
           4       0.96      1.00      0.98        26
           5       1.00      1.00      1.00        25
           6       1.00      1.00      1.00        17
           7       0.78      1.00      0.88        18
           8       1.00      1.00      1.00        17
           9       0.95      1.00      0.98        21
          10       1.00      0.88      0.93        16
          11       1.00      0.88      0.94        17
          12       1.00      1.00      1.00        29
          13       1.00      0.95      0.97        20
          14       0.90      0.75      0.82        12
          15       0.95      1.00      0.98        20
          16       1.00      0.89      0.94        18
          17       1.00      1.00    

In [24]:
classifier=models['Gradient Boosting']
classifier.fit(X_train,Y_train)
predictions=classifier.predict(X_test)

# Predictive system

In [25]:
def recommendation(N,P,k,temperature,humidity,ph,rainfal):
    features=np.array([[N,P,k,temperature,humidity,ph,rainfal]])
    transformed_features=scaler.fit_transform(features)
    prediction=classifier.predict(transformed_features)
    print(prediction)
    return prediction[0] 

In [26]:
crop_map={}
for crop in crops:
    crop_map[label_map[crop]]=crop
crop_map

{1: 'mango',
 2: 'mungbean',
 3: 'orange',
 4: 'cotton',
 5: 'chickpea',
 6: 'grapes',
 7: 'blackgram',
 8: 'pomegranate',
 9: 'kidneybeans',
 10: 'mothbeans',
 11: 'jute',
 12: 'coffee',
 13: 'maize',
 14: 'papaya',
 15: 'muskmelon',
 16: 'pigeonpeas',
 17: 'apple',
 18: 'lentil',
 19: 'watermelon',
 20: 'coconut',
 21: 'banana',
 22: 'rice'}

In [35]:
data[data['label_num'] == 9]

Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall,label_num
310,28,58,24,19.727025,18.281730,5.748190,143.763089,9
342,28,66,23,21.539892,24.253862,5.996161,120.691304,9
346,0,65,15,23.461683,23.221976,5.645436,95.842534,9
334,1,62,23,15.435461,18.374779,5.607808,139.030203,9
324,11,71,17,19.919179,21.473242,5.746448,82.685544,9
...,...,...,...,...,...,...,...,...
398,27,63,19,20.934099,21.189301,5.562202,133.191442,9
385,32,68,19,24.628350,18.183252,5.514234,149.744103,9
337,6,62,22,20.530527,18.092240,5.824091,120.450929,9
396,30,63,16,23.605066,21.905396,5.525905,100.597873,9


In [30]:
# input 1

N = 80
P = 10
k = 50
temperature = 40.0
humidity = 20
ph = 100
rainfall = 100

predict = recommendation(N,P,k,temperature,humidity,ph,rainfall)
if predict in crop_map:
    print("{} is a best crop to be cultivated ".format(crop_map[22]))
else:
    print("Sorry are not able to recommend a proper crop for this environment")

[9]
rice is a best crop to be cultivated 


In [34]:
N = 9
P = 35
k = 20
temperature = 27.4
humidity = 80.9
ph = 6.9
rainfall = 40.53

predict = recommendation(N,P,k,temperature,humidity,ph,rainfall)
if predict in crop_map:
    print("{} is a best crop to be cultivated ".format(crop_map[predict]))
else:
    print("Sorry are not able to recommend a proper crop for this environment")

[9]
kidneybeans is a best crop to be cultivated 
