In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
import warnings

In [2]:
data=pd.read_csv('Crop_recommendation.csv')
data=data.sample(frac=1) 
data.head()

Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall,label
1058,86,79,45,27.812515,82.692854,5.807664,99.209615,banana
575,27,59,20,28.009374,52.6095,4.397699,36.01203,mothbeans
972,18,21,35,23.280123,94.943305,6.368561,111.13821,pomegranate
347,13,72,21,24.321166,21.027867,5.821194,60.275525,kidneybeans
1497,106,21,52,28.895786,94.78993,6.286515,23.03625,muskmelon


In [3]:
data.shape

(2200, 8)

In [4]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 2200 entries, 1058 to 921
Data columns (total 8 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   N            2200 non-null   int64  
 1   P            2200 non-null   int64  
 2   K            2200 non-null   int64  
 3   temperature  2200 non-null   float64
 4   humidity     2200 non-null   float64
 5   ph           2200 non-null   float64
 6   rainfall     2200 non-null   float64
 7   label        2200 non-null   object 
dtypes: float64(4), int64(3), object(1)
memory usage: 154.7+ KB


In [5]:
data.isnull().sum()

N              0
P              0
K              0
temperature    0
humidity       0
ph             0
rainfall       0
label          0
dtype: int64

In [6]:
data.duplicated().sum()

0

In [7]:
data.describe()

Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall
count,2200.0,2200.0,2200.0,2200.0,2200.0,2200.0,2200.0
mean,50.551818,53.362727,48.149091,25.616244,71.481779,6.46948,103.463655
std,36.917334,32.985883,50.647931,5.063749,22.263812,0.773938,54.958389
min,0.0,5.0,5.0,8.825675,14.25804,3.504752,20.211267
25%,21.0,28.0,20.0,22.769375,60.261953,5.971693,64.551686
50%,37.0,51.0,32.0,25.598693,80.473146,6.425045,94.867624
75%,84.25,68.0,49.0,28.561654,89.948771,6.923643,124.267508
max,140.0,145.0,205.0,43.675493,99.981876,9.935091,298.560117


In [8]:
data['label'].value_counts()

label
banana         100
mothbeans      100
watermelon     100
mango          100
chickpea       100
pigeonpeas     100
coffee         100
grapes         100
cotton         100
lentil         100
papaya         100
blackgram      100
rice           100
orange         100
mungbean       100
apple          100
maize          100
jute           100
muskmelon      100
kidneybeans    100
pomegranate    100
coconut        100
Name: count, dtype: int64

In [9]:
label_map={}
crops=data['label'].unique()
val=1
for crop in crops:
    label_map[crop]=val
    val+=1
data['label_num']=data['label'].map(label_map)
label_map

{'banana': 1,
 'mothbeans': 2,
 'pomegranate': 3,
 'kidneybeans': 4,
 'muskmelon': 5,
 'jute': 6,
 'maize': 7,
 'apple': 8,
 'mungbean': 9,
 'orange': 10,
 'rice': 11,
 'blackgram': 12,
 'papaya': 13,
 'lentil': 14,
 'cotton': 15,
 'grapes': 16,
 'coffee': 17,
 'pigeonpeas': 18,
 'chickpea': 19,
 'mango': 20,
 'watermelon': 21,
 'coconut': 22}

In [10]:
data.drop(columns=['label'],axis=1,inplace=True)
data.head()

Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall,label_num
1058,86,79,45,27.812515,82.692854,5.807664,99.209615,1
575,27,59,20,28.009374,52.6095,4.397699,36.01203,2
972,18,21,35,23.280123,94.943305,6.368561,111.13821,3
347,13,72,21,24.321166,21.027867,5.821194,60.275525,4
1497,106,21,52,28.895786,94.78993,6.286515,23.03625,5


# Train Test Split

In [11]:
X=data.drop(columns=['label_num'],axis=1)
Y=data['label_num']

In [12]:
from sklearn.preprocessing import StandardScaler
scaler=StandardScaler()
scaled_X=scaler.fit_transform(X)

In [13]:
from sklearn.model_selection import train_test_split
X_train,X_test,Y_train,Y_test=train_test_split(scaled_X,Y,train_size=0.70,random_state=42)

In [14]:
X_train.shape, X_test.shape, Y_train.shape, Y_test.shape

((1540, 7), (660, 7), (1540,), (660,))

# Implementing models

In [15]:
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier, ExtraTreeClassifier
from sklearn.ensemble import RandomForestClassifier, BaggingClassifier, GradientBoostingClassifier, AdaBoostClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, precision_score, recall_score, f1_score

In [16]:
models={
    'Logistic Regression': LogisticRegression(class_weight='balanced'),
    'Naive Bayes': GaussianNB(),
    'Support Vector Machine': SVC(),
    'K-Nearest Neighbors': KNeighborsClassifier(),
    'Decision Tree': DecisionTreeClassifier(),
    'Extra Trees': ExtraTreeClassifier(),
    'Random Forest': RandomForestClassifier(),
    'Bagging': BaggingClassifier(),
    'Gradient Boosting': GradientBoostingClassifier(),
}

In [17]:
# for name,model in models.items():
#     model.fit(X_train,Y_train)
#     predictions=model.predict(X_test)
#     print(f"{name} : Classification report")
#     print(classification_report(Y_test,predictions),'\n')
#     print(f"{name} Precision score : ", precision_score(Y_test,predictions,average="macro"),'\n')
#     print(f"{name} Recall score : ", recall_score(Y_test,predictions,average="macro"),'\n')
#     print("*******************************************")

In [18]:
classifier=models['Logistic Regression']
classifier.fit(X_train,Y_train)
predictions=classifier.predict(X_test)

# Predictive system

In [19]:
def recommendation(N,P,k,temperature,humidity,ph,rainfal):
    features=np.array([[N,P,k,temperature,humidity,ph,rainfal]])
    transformed_features=scaler.fit_transform(features)
    prediction=classifier.predict(transformed_features)
    print(prediction)
    return prediction[0]

In [20]:
crop_map={}
for crop in crops:
    crop_map[label_map[crop]]=crop
crop_map

{1: 'banana',
 2: 'mothbeans',
 3: 'pomegranate',
 4: 'kidneybeans',
 5: 'muskmelon',
 6: 'jute',
 7: 'maize',
 8: 'apple',
 9: 'mungbean',
 10: 'orange',
 11: 'rice',
 12: 'blackgram',
 13: 'papaya',
 14: 'lentil',
 15: 'cotton',
 16: 'grapes',
 17: 'coffee',
 18: 'pigeonpeas',
 19: 'chickpea',
 20: 'mango',
 21: 'watermelon',
 22: 'coconut'}

In [21]:
# input 1

N = 49
P = 55
k = 51
temperature = 24.87
humidity = 93.9
ph = 6.67
rainfall = 135

predict = recommendation(N,P,k,temperature,humidity,ph,rainfall)
if predict in crop_map:
    print("{} is a best crop to be cultivated ".format(predict))
else:
    print("Sorry are not able to recommend a proper crop for this environment")

[6]
6 is a best crop to be cultivated 


In [22]:
N = 9
P = 35
k = 20
temperature = 27.4
humidity = 80.9
ph = 6.9
rainfall = 40.53

predict = recommendation(N,P,k,temperature,humidity,ph,rainfall)
if predict in crop_map:
    print("{} is a best crop to be cultivated ".format(predict))
else:
    print("Sorry are not able to recommend a proper crop for this environment")

[6]
6 is a best crop to be cultivated 


In [23]:
data.head(15)

Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall,label_num
1058,86,79,45,27.812515,82.692854,5.807664,99.209615,1
575,27,59,20,28.009374,52.6095,4.397699,36.01203,2
972,18,21,35,23.280123,94.943305,6.368561,111.13821,3
347,13,72,21,24.321166,21.027867,5.821194,60.275525,4
1497,106,21,52,28.895786,94.78993,6.286515,23.03625,5
1400,115,17,55,27.578269,94.118782,6.776533,28.082532,5
338,25,63,20,15.786014,21.145441,5.502999,95.170281,4
2071,99,57,38,24.80625,82.092817,6.356296,156.361617,6
172,86,37,16,20.517168,59.212355,5.561511,67.610137,7
1557,30,127,204,22.500503,92.458783,6.126437,100.93439,8


In [24]:
N = 49
P = 70
k = 76
temperature = 19.7
humidity = 17.6
ph = 6.61
rainfall = 85.57

predict = recommendation(N,P,k,temperature,humidity,ph,rainfall)
if predict in crop_map:
    print("{} is a best crop to be cultivated ".format(predict))
else:
    print("Sorry are not able to recommend a proper crop for this environment")

[6]
6 is a best crop to be cultivated 


In [29]:
model=LogisticRegression()
model.fit(X_train,Y_train)
p=model.predict(X_train)
f1=f1_score(Y_train,p,average="macro")
p=model.predict(X_test)
f2=f1_score(Y_test,p,average="macro")
f1,f2

(0.9704530642305707, 0.9774913937316764)