# Crop Prediction using Naive Bayes

## Importing the required libraries

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

## Reading the csv file

In [2]:
data=pd.read_csv('crop_prediction.csv')


In [3]:
data.head()

Unnamed: 0,Temperature,Humidity,pH,Rainfall,Label
0,20.879744,82.002744,6.502985,202.935536,Rice
1,21.770462,80.319644,7.038096,226.655537,Rice
2,23.004459,82.320763,7.840207,263.964248,Rice
3,26.491096,80.158363,6.980401,242.864034,Rice
4,20.130175,81.604873,7.628473,262.71734,Rice


In [4]:
X = data.iloc[:,:-1].values
y = data.iloc[:,-1].values

In [5]:
X

array([[ 20.87974371,  82.00274423,   6.50298529, 202.9355362 ],
       [ 21.77046169,  80.31964408,   7.03809636, 226.6555374 ],
       [ 23.00445915,  82.3207629 ,   7.84020714, 263.9642476 ],
       ...,
       [ 30.41235793,  52.48100602,   6.62162355,  93.92375879],
       [ 32.17752026,  54.01352682,   6.20749581,  91.88766069],
       [ 32.6112614 ,  47.74916499,   5.41847526,  91.10190759]])

In [6]:
y

array(['Rice', 'Rice', 'Rice', ..., 'Mango', 'Mango', 'Mango'],
      dtype=object)

In [7]:
categorical_y = y

In [8]:
unique_crops = data['Label'].unique()
print(unique_crops)

['Rice' 'Wheat' 'Mung Beans' 'Millet' 'Maize' 'Lentil' 'Cotton'
 'Groundnut' 'Sugarcane' 'Black Gram' 'Pigeon Peas' 'Chick Peas' 'Banana'
 'Grape' 'Mango']


In [9]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
y = le.fit_transform(y)

## Splitting the dataset into the Training set and Test set

In [10]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)

## Feature Scaling

In [11]:
# from sklearn.preprocessing import StandardScaler
# sc = StandardScaler()
# X_train = sc.fit_transform(X_train)
# X_test = sc.transform(X_test)

## Training the Naive Bayes model on the training set

In [12]:
from sklearn.naive_bayes import GaussianNB 
classifier = GaussianNB()
classifier.fit(X_train, y_train)

GaussianNB(priors=None, var_smoothing=1e-09)

## Predicting the Test set result

In [13]:
y_pred = classifier.predict(X_test)

In [14]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
#print(cm)

In [15]:
a = accuracy_score(y_test, y_pred)
print(a)

0.9546666666666667


## Cross Validation


In [16]:
from sklearn.model_selection import cross_val_score

scores = cross_val_score(classifier, X_train, y_train, cv = 10, scoring='accuracy')

print('Cross-validation scores:{}'.format(scores))

Cross-validation scores:[0.96460177 0.97345133 0.9380531  0.9380531  0.97345133 0.91071429
 0.9375     0.96428571 0.94642857 0.95535714]


In [17]:
# compute Average cross-validation score

print('Average cross-validation score: {:.4f}'.format(scores.mean()))

Average cross-validation score: 0.9502


## Classification Report


In [20]:
from sklearn.metrics import classification_report
target_names=unique_crops
print(classification_report(y_test, y_pred, target_names=target_names))

              precision    recall  f1-score   support

        Rice       0.87      1.00      0.93        20
       Wheat       1.00      1.00      1.00        29
  Mung Beans       1.00      1.00      1.00        27
      Millet       0.89      1.00      0.94        25
       Maize       1.00      1.00      1.00        29
      Lentil       0.94      0.88      0.91        17
      Cotton       1.00      1.00      1.00        30
   Groundnut       0.93      0.84      0.88        31
   Sugarcane       0.93      1.00      0.96        27
  Black Gram       1.00      1.00      1.00        23
 Pigeon Peas       1.00      1.00      1.00        23
  Chick Peas       1.00      0.83      0.90        23
      Banana       0.97      1.00      0.98        32
       Grape       1.00      0.83      0.91        18
       Mango       0.78      0.86      0.82        21

    accuracy                           0.95       375
   macro avg       0.95      0.95      0.95       375
weighted avg       0.96   

In [22]:
from sklearn.metrics import cohen_kappa_score 

print(cohen_kappa_score(y_test, y_pred))

0.9513087445675834
