# Crop Recommendation Model

### Importing Modules

In [1]:
from __future__ import print_function
import pandas as pd
import numpy as np
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.naive_bayes import GaussianNB
import pickle
import warnings
warnings.filterwarnings('ignore')

### Importing Dataset

In [2]:
df = pd.read_csv('./crop_recommendation.csv') # to read the dataset

In [3]:
df.head() # printing first 5 rows

Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall,label
0,90,42,43,20.879744,82.002744,6.502985,202.935536,rice
1,85,58,41,21.770462,80.319644,7.038096,226.655537,rice
2,60,55,44,23.004459,82.320763,7.840207,263.964248,rice
3,74,35,40,26.491096,80.158363,6.980401,242.864034,rice
4,78,42,42,20.130175,81.604873,7.628473,262.71734,rice


In [4]:
df.tail() # printing last 5 rows

Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall,label
2195,107,34,32,26.774637,66.413269,6.780064,177.774507,coffee
2196,99,15,27,27.417112,56.636362,6.086922,127.92461,coffee
2197,118,33,30,24.131797,67.225123,6.362608,173.322839,coffee
2198,117,32,34,26.272418,52.127394,6.758793,127.175293,coffee
2199,104,18,30,23.603016,60.396475,6.779833,140.937041,coffee


In [5]:
df.size # printing the size

17600

In [6]:
df.shape # printing the shape

(2200, 8)

In [7]:
df.columns # list of columns

Index(['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall', 'label'], dtype='object')

In [8]:
df['label'].unique() # list of unique labels

array(['rice', 'maize', 'chickpea', 'kidneybeans', 'pigeonpeas',
       'mothbeans', 'mungbean', 'blackgram', 'lentil', 'pomegranate',
       'banana', 'mango', 'grapes', 'watermelon', 'muskmelon', 'apple',
       'orange', 'papaya', 'coconut', 'cotton', 'jute', 'coffee'],
      dtype=object)

In [9]:
df.dtypes # datatype for each column

N                int64
P                int64
K                int64
temperature    float64
humidity       float64
ph             float64
rainfall       float64
label           object
dtype: object

In [10]:
df['label'].value_counts() # checking spread of the data

rice           100
maize          100
jute           100
cotton         100
coconut        100
papaya         100
orange         100
apple          100
muskmelon      100
watermelon     100
grapes         100
mango          100
banana         100
pomegranate    100
lentil         100
blackgram      100
mungbean       100
mothbeans      100
pigeonpeas     100
kidneybeans    100
chickpea       100
coffee         100
Name: label, dtype: int64

### Defining Feature and Target labels

In [11]:
features = df[['N', 'P','K','temperature', 'humidity', 'ph', 'rainfall']]
target = df['label']
labels = df['label']

### Splitting Dataset into training annd testing
- Training data: 80%
- Testing data: 20%

In [12]:
x_train, x_test, y_train, y_test = train_test_split(features,target,test_size = 0.2,random_state = 42)

### Naive Bayes

In [13]:
NB = GaussianNB()

NB.fit(x_train,y_train)

predicted_values = NB.predict(x_test)
x = metrics.accuracy_score(y_test, predicted_values)

In [14]:
print("Naive Bayes's Accuracy: ", x) #accuracy

Naive Bayes's Accuracy:  0.9954545454545455


In [15]:
print(metrics.classification_report(y_test,predicted_values)) # classification report

              precision    recall  f1-score   support

       apple       1.00      1.00      1.00        23
      banana       1.00      1.00      1.00        21
   blackgram       1.00      1.00      1.00        20
    chickpea       1.00      1.00      1.00        26
     coconut       1.00      1.00      1.00        27
      coffee       1.00      1.00      1.00        17
      cotton       1.00      1.00      1.00        17
      grapes       1.00      1.00      1.00        14
        jute       0.92      1.00      0.96        23
 kidneybeans       1.00      1.00      1.00        20
      lentil       1.00      1.00      1.00        11
       maize       1.00      1.00      1.00        21
       mango       1.00      1.00      1.00        19
   mothbeans       1.00      1.00      1.00        24
    mungbean       1.00      1.00      1.00        19
   muskmelon       1.00      1.00      1.00        17
      orange       1.00      1.00      1.00        14
      papaya       1.00    

In [16]:
# Cross validation score
score = cross_val_score(NB,features,target,cv=5)
score

array([0.99772727, 0.99545455, 0.99545455, 0.99545455, 0.99090909])

### Saving the model using pickle

In [20]:
NB_filename = 'Crop_recommender.pkl' #file name+location

NB_Model = open(NB_filename, 'wb') # creating file
pickle.dump(NB, NB_Model) # dumping the model into the file

NB_Model.close() # Closing the pickle instances

### Testing the model

In [21]:
data = np.array([[99, 33, 31, 21.2, 55, 7.2, 130]])
result = NB.predict(data)
print(result)

['coffee']


In [22]:
data = np.array([[66, 11, 70, 17, 76.7, 6.9, 123]])
result = NB.predict(data)
print(result)

['jute']
