SVC

In [53]:
# Initial imports
import pandas as pd
from pathlib import Path
from sklearn import svm, datasets
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import f1_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
%matplotlib inline


## Load data and preview data

In [54]:
# Loading data
crop_predictor_df = pd.read_csv("Resources/crop_recommendation_family_clean.csv")
crop_predictor_df.head()

Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall,crop_name,kingdom,order,family,genus,botanical_name
0,90,42,43,20.88,82.0,6.5,202.94,rice,plantae,poales,poaceae,oryza,oryza sativa
1,85,58,41,21.77,80.32,7.04,226.66,rice,plantae,poales,poaceae,oryza,oryza sativa
2,60,55,44,23.0,82.32,7.84,263.96,rice,plantae,poales,poaceae,oryza,oryza sativa
3,74,35,40,26.49,80.16,6.98,242.86,rice,plantae,poales,poaceae,oryza,oryza sativa
4,78,42,42,20.13,81.6,7.63,262.72,rice,plantae,poales,poaceae,oryza,oryza sativa


In [55]:
crop_predictor_df.drop(["kingdom", "botanical_name", "family", "genus", "order"], axis=1, inplace=True)

In [56]:
# pd.set_option('display.max_columns', None)
# crop_predictor_dummies_df = pd.get_dummies(crop_predictor_df, prefix=["family", "genus", "order"], columns= ["family", "genus", "order"])

In [57]:
crop_predictor_df.dtypes

N                int64
P                int64
K                int64
temperature    float64
humidity       float64
ph             float64
rainfall       float64
crop_name       object
dtype: object

In [58]:
crop_predictor_df['crop_name'].unique()

array(['rice', 'maize', 'chickpea', 'kidney_bean', 'pigeon_pea',
       'moth_bean', 'mung_bean', 'urad_bean', 'lentil', 'pomegranate',
       'banana', 'mango', 'grape', 'watermelon', 'muskmelon', 'apple',
       'orange', 'papaya', 'coconut', 'cotton', 'jute', 'coffee'],
      dtype=object)

In [59]:
crop_predictor_df['crop_name'].value_counts()

crop_name
rice           100
maize          100
jute           100
cotton         100
coconut        100
papaya         100
orange         100
apple          100
muskmelon      100
watermelon     100
grape          100
mango          100
banana         100
pomegranate    100
lentil         100
urad_bean      100
mung_bean      100
moth_bean      100
pigeon_pea     100
kidney_bean    100
chickpea       100
coffee         100
Name: count, dtype: int64

In [60]:
# Define features set
X = crop_predictor_df.copy()
X.drop(["crop_name"], axis=1, inplace=True)
X.head(10)

Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall
0,90,42,43,20.88,82.0,6.5,202.94
1,85,58,41,21.77,80.32,7.04,226.66
2,60,55,44,23.0,82.32,7.84,263.96
3,74,35,40,26.49,80.16,6.98,242.86
4,78,42,42,20.13,81.6,7.63,262.72
5,69,37,42,23.06,83.37,7.07,251.05
6,69,55,38,22.71,82.64,5.7,271.32
7,94,53,40,20.28,82.89,5.72,241.97
8,89,54,38,24.52,83.54,6.69,230.45
9,68,58,38,23.22,83.03,6.34,221.21


In [61]:
# Define target vector
y = crop_predictor_df["crop_name"].ravel()
y[:-1]

array(['rice', 'rice', 'rice', ..., 'coffee', 'coffee', 'coffee'],
      dtype=object)

In [62]:
X.head()

Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall
0,90,42,43,20.88,82.0,6.5,202.94
1,85,58,41,21.77,80.32,7.04,226.66
2,60,55,44,23.0,82.32,7.84,263.96
3,74,35,40,26.49,80.16,6.98,242.86
4,78,42,42,20.13,81.6,7.63,262.72


In [63]:
# Splitting into Train and Test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.80, test_size=0.20, random_state=42)
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((1760, 7), (440, 7), (1760,), (440,))

In [64]:
rbf = svm.SVC(kernel='rbf', gamma=0.5, C=0.1).fit(X_train, y_train)
poly = svm.SVC(kernel='poly', degree=3, C=1).fit(X_train, y_train)

In [65]:
poly_pred = poly.predict(X_test)
rbf_pred = rbf.predict(X_test)

In [66]:
poly_accuracy = accuracy_score(y_test, poly_pred)
poly_f1 = f1_score(y_test, poly_pred, average='weighted')
print('Accuracy (Polynomial Kernel): ', "%.2f" % (poly_accuracy*100))
print('F1 (Polynomial Kernel): ', "%.2f" % (poly_f1*100))

Accuracy (Polynomial Kernel):  98.18
F1 (Polynomial Kernel):  98.16


In [67]:
rbf_accuracy = accuracy_score(y_test, rbf_pred)
rbf_f1 = f1_score(y_test, rbf_pred, average='weighted')
print('Accuracy (RBF Kernel): ', "%.2f" % (rbf_accuracy*100))
print('F1 (RBF Kernel): ', "%.2f" % (rbf_f1*100))

Accuracy (RBF Kernel):  2.50
F1 (RBF Kernel):  0.12


In [68]:
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

(1760, 7)
(1760,)
(440, 7)
(440,)
