In [1]:
import numpy as np
import pandas as pd

from matplotlib import pyplot as plt

## Loading Dataset

In [2]:
df = pd.read_csv(r"D:\\TRI-NIT\\Dataset\\Crop_recommendation.csv")
X = df.drop(['label'], axis=1)
y = df.label
df.head()

Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall,label
0,90,42,43,20.879744,82.002744,6.502985,202.935536,rice
1,85,58,41,21.770462,80.319644,7.038096,226.655537,rice
2,60,55,44,23.004459,82.320763,7.840207,263.964248,rice
3,74,35,40,26.491096,80.158363,6.980401,242.864034,rice
4,78,42,42,20.130175,81.604873,7.628473,262.71734,rice


In [3]:
# missing data
df.isna().sum()

N              0
P              0
K              0
temperature    0
humidity       0
ph             0
rainfall       0
label          0
dtype: int64

## List of crops

In [4]:
df["label"].unique()

array(['rice', 'maize', 'chickpea', 'kidneybeans', 'pigeonpeas',
       'mothbeans', 'mungbean', 'blackgram', 'lentil', 'pomegranate',
       'banana', 'mango', 'grapes', 'watermelon', 'muskmelon', 'apple',
       'orange', 'papaya', 'coconut', 'cotton', 'jute', 'coffee'],
      dtype=object)

## Data Preprocessing

In [5]:
# data preprocessing
from sklearn.preprocessing import StandardScaler

X_standardized = X.copy()
for col in X_standardized.columns:
    X_standardized[col] = StandardScaler().fit_transform(X_standardized[col].values.reshape(-1, 1))
    
X_standardized.head()

Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall
0,1.068797,-0.344551,-0.101688,-0.935587,0.472666,0.043302,1.810361
1,0.933329,0.140616,-0.141185,-0.759646,0.397051,0.734873,2.242058
2,0.255986,0.049647,-0.081939,-0.515898,0.486954,1.77151,2.921066
3,0.635298,-0.556811,-0.160933,0.172807,0.389805,0.660308,2.537048
4,0.743673,-0.344551,-0.121436,-1.083647,0.454792,1.497868,2.898373


## Splitting the data

In [6]:
# train-test split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_standardized, y, train_size=0.8, random_state=42)

## SVM

In [7]:
from sklearn.svm import SVC
from sklearn import metrics
svc = SVC(kernel='linear') # Linear Kernel
y_pred = svc.fit(X_train, y_train).predict(X_test)

svc_accuracy = metrics.accuracy_score(y_test, y_pred)
print("Accuracy:", svc_accuracy)

Accuracy: 0.9772727272727273


In [8]:
from sklearn import metrics
print(metrics.classification_report(y_pred, y_test),metrics.accuracy_score(y_test, y_pred))

              precision    recall  f1-score   support

       apple       1.00      1.00      1.00        23
      banana       1.00      1.00      1.00        21
   blackgram       1.00      0.95      0.98        21
    chickpea       1.00      1.00      1.00        26
     coconut       1.00      1.00      1.00        27
      coffee       1.00      1.00      1.00        17
      cotton       1.00      0.94      0.97        18
      grapes       1.00      1.00      1.00        14
        jute       0.96      0.85      0.90        26
 kidneybeans       1.00      0.91      0.95        22
      lentil       1.00      0.92      0.96        12
       maize       0.95      1.00      0.98        20
       mango       1.00      1.00      1.00        19
   mothbeans       0.96      1.00      0.98        23
    mungbean       1.00      1.00      1.00        19
   muskmelon       1.00      1.00      1.00        17
      orange       1.00      1.00      1.00        14
      papaya       1.00    

## KNN - Most suitable crop and List of Crops

In [9]:
dataset = df.to_numpy()
print(len(dataset))
test = [[45,24,32,45,52,5.9,95]]
test = test[0]
print(test)

2200
[45, 24, 32, 45, 52, 5.9, 95]


In [10]:
from math import sqrt
 
# calculate the Euclidean distance between two vectors
def euclidean_distance(row1, row2):
    distance = 0.0
    
    for i in range(len(row1)-1):
        distance += (row1[i] - row2[i])**2
    return sqrt(distance)
 
# Locate the most similar neighbors
def get_neighbors(train, test_row, num_neighbors):
    distances = list()
    for train_row in train:
        dist = euclidean_distance(test_row, train_row)
        distances.append((train_row, dist))
    distances.sort(key=lambda tup: tup[1])
    #print(distances)
    neighbors = list()
    for i in range(num_neighbors):
        neighbors.append(distances[i][0])
    return neighbors
 
neighbors = get_neighbors(dataset, test, 100)
pred=[]
for neighbor in neighbors:
    pred.append(neighbor[-1])

## Most suitable crop

In [11]:
# x = np.array(pred)
print(max(set(pred), key = pred.count))

mango


## List of crops

In [12]:
np.unique(pred).tolist()

['maize', 'mango', 'mothbeans']