# CROP RECOMMENDATION SYSTEM 

![](https://images.unsplash.com/photo-1560493676-04071c5f467b?ixid=MXwxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHw%3D&ixlib=rb-1.2.1&auto=format&fit=crop&w=968&q=80)

# Import data

In [1]:
# Importing libraries

from __future__ import print_function
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import classification_report
from sklearn import metrics
from sklearn import tree
import warnings
warnings.filterwarnings('ignore')

In [2]:
# PATH = '/content/drive/MyDrive/Data/Tasks/new_crop_data.csv'
# df_crop_crop = pd.read_csv(PATH)
#load the dataset
df_crop = pd.read_csv('new_crop_data.csv')

In [3]:
df_crop.head()

Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall,label
0,90.0,42.0,43.0,20.879744,82.002744,6.502985,202.935536,maize
1,85.0,58.0,41.0,21.770462,80.319644,7.038096,226.655537,maize
2,60.0,55.0,44.0,23.004459,82.320763,7.840207,263.964248,maize
3,74.0,35.0,40.0,26.491096,80.158363,6.980401,242.864034,maize
4,78.0,42.0,42.0,20.130175,81.604873,7.628473,262.71734,maize


In [26]:
df_crop.tail()

Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall,label
2205,50.551818,53.362727,48.149091,25.616244,71.481779,6.46948,103.463655,tomatoes
2206,50.551818,53.362727,48.149091,25.616244,71.481779,6.46948,103.463655,avocado
2207,50.551818,53.362727,48.149091,25.616244,71.481779,6.46948,103.463655,napier grass
2208,50.551818,53.362727,48.149091,25.616244,71.481779,6.46948,103.463655,pasture grass
2209,50.551818,53.362727,48.149091,25.616244,71.481779,6.46948,103.463655,cucumber


In [4]:
#print unique values in the crop column
df_crop.label.unique()

array(['maize', 'coffee', 'beans', 'green peas', 'sugarcane', 'capsicum',
       'onions', 'tomatoes', 'avocado', 'napier grass', 'pasture grass',
       'cucumber'], dtype=object)

In [5]:
#print the size of the dataset
df_crop.size

17680

In [6]:
#print the shape of the dataset
df_crop.shape

(2210, 8)

In [7]:
df_crop.columns

Index(['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall', 'label'], dtype='object')

In [8]:
df_crop['label'].unique()

array(['maize', 'coffee', 'beans', 'green peas', 'sugarcane', 'capsicum',
       'onions', 'tomatoes', 'avocado', 'napier grass', 'pasture grass',
       'cucumber'], dtype=object)

In [9]:
df_crop.dtypes

N              float64
P              float64
K              float64
temperature    float64
humidity       float64
ph             float64
rainfall       float64
label           object
dtype: object

In [10]:
#show the count of each category of unique values in the crop column
df_crop['label'].value_counts()

label
maize            2100
coffee            100
beans               1
green peas          1
sugarcane           1
capsicum            1
onions              1
tomatoes            1
avocado             1
napier grass        1
pasture grass       1
cucumber            1
Name: count, dtype: int64

In [11]:
from sklearn.impute import SimpleImputer
import numpy as np

# Select only numeric columns
numeric_columns = df_crop.select_dtypes(include=np.number).columns

# Create an instance of SimpleImputer with strategy='mean'
imputer = SimpleImputer(strategy='mean')

# Fit the imputer on the numeric columns
imputer.fit(df_crop[numeric_columns])

# Transform and replace NaN values with column means for numeric columns
df_crop[numeric_columns] = imputer.transform(df_crop[numeric_columns])


### Seperating features and target label

In [12]:
features = df_crop[['N', 'P','K','temperature', 'humidity', 'ph', 'rainfall']]
target = df_crop['label']
labels = df_crop['label']

In [13]:
# Initializing empty lists to append all model's name and corresponding name
acc = []
model = []

# Modelling

In [14]:
# Splitting into train and test data

from sklearn.model_selection import train_test_split
Xtrain, Xtest, Ytrain, Ytest = train_test_split(features,target,test_size = 0.2,random_state =2)

# Decision Tree

In [15]:
from sklearn.tree import DecisionTreeClassifier

DecisionTree = DecisionTreeClassifier(criterion="entropy",random_state=2,max_depth=5)

DecisionTree.fit(Xtrain,Ytrain)

predicted_values = DecisionTree.predict(Xtest)
x = metrics.accuracy_score(Ytest, predicted_values)
acc.append(x)
model.append('Decision Tree')
print("DecisionTrees's Accuracy is: ", x*100)

print(classification_report(Ytest,predicted_values))

DecisionTrees's Accuracy is:  99.3212669683258
              precision    recall  f1-score   support

     avocado       0.00      0.00      0.00         0
      coffee       1.00      1.00      1.00        19
    cucumber       0.00      0.00      0.00         1
       maize       1.00      1.00      1.00       420
   sugarcane       0.00      0.00      0.00         1
    tomatoes       0.00      0.00      0.00         1

    accuracy                           0.99       442
   macro avg       0.33      0.33      0.33       442
weighted avg       0.99      0.99      0.99       442



### Scoring the model

In [16]:
from sklearn.model_selection import cross_val_score

In [17]:
# Cross validation score (Decision Tree)
score = cross_val_score(DecisionTree, features, target,cv=5)

In [18]:
score

array([0.99547511, 0.99547511, 0.99547511, 0.99547511, 0.97963801])

### Saving trained Decision Tree model

In [19]:
import joblib
# Dump the trained classifier with joblib
DT_pkl_filename = 'DecisionTree.pkl'
# Open the file to save as pkl file
joblib.dump(DecisionTree, DT_pkl_filename)

saved_model = joblib.load(DT_pkl_filename)

## Making a prediction

In [20]:
data = np.array([[10, 18, 30, 23.603016, 60.3, 6.7, 140.91]])
prediction = saved_model.predict(data)
print(prediction)

['maize']


In [21]:
data = np.array([[83, 45, 60, 28, 70.3, 7.0, 150.9]])
prediction = saved_model.predict(data)
print(prediction)

['coffee']


# Guassian Naive Bayes

In [22]:
from sklearn.naive_bayes import GaussianNB

NaiveBayes = GaussianNB()

NaiveBayes.fit(Xtrain,Ytrain)

predicted_values = NaiveBayes.predict(Xtest)
x = metrics.accuracy_score(Ytest, predicted_values)
acc.append(x)
model.append('Naive Bayes')
print("Naive Bayes's Accuracy is: ", x)

print(classification_report(Ytest,predicted_values))

Naive Bayes's Accuracy is:  0.9886877828054299
              precision    recall  f1-score   support

     avocado       0.00      0.00      0.00         0
      coffee       0.90      1.00      0.95        19
    cucumber       0.00      0.00      0.00         1
       maize       1.00      1.00      1.00       420
   sugarcane       0.00      0.00      0.00         1
    tomatoes       0.00      0.00      0.00         1

    accuracy                           0.99       442
   macro avg       0.32      0.33      0.32       442
weighted avg       0.99      0.99      0.99       442



In [23]:
# Cross validation score (NaiveBayes)
score = cross_val_score(NaiveBayes,features,target,cv=5)
score

array([0.99547511, 0.99547511, 0.99547511, 0.99547511, 0.98868778])

# Saving Guassian Naive Bayes Model

In [24]:
import pickle
# Dump the trained Naive Bayes classifier with Pickle
NB_pkl_filename = 'NBClassifier.pkl'
# Open the file to save as pkl file
NB_Model_pkl = open(NB_pkl_filename, 'wb')
pickle.dump(NaiveBayes, NB_Model_pkl)
# Close the pickle instances
NB_Model_pkl.close()

# Exporting the models

# export the decision tree model
from sklearn import tree
tree.export_graphviz(DecisionTree, out_file='tree.dot',
                      feature_names=['N', 'P','K','temperature', 'humidity', 'ph', 'rainfall'], class_names=sorted(Ytrain.unique()), label='all', rounded=True, filled=True)