# IMPORT LIBRARIES

In [1]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix
from sklearn.svm import SVC

np.random.seed(1)

# LOAD THE DATASET

In [2]:
df=pd.read_csv("C:/Users/18137/Box/DSP/SVM/RidingMowers.csv")
df.head()

Unnamed: 0,Income,Lot_Size,Ownership
0,60.0,18.4,Owner
1,85.5,16.8,Owner
2,64.8,21.6,Owner
3,61.5,20.8,Owner
4,87.0,23.6,Owner


# TARGET VARIABLE LABEL ENCODING

In [3]:
from sklearn.preprocessing import LabelEncoder
labelencoder = LabelEncoder()
df['Ownership'] = labelencoder.fit_transform(df['Ownership'])
df

Unnamed: 0,Income,Lot_Size,Ownership
0,60.0,18.4,1
1,85.5,16.8,1
2,64.8,21.6,1
3,61.5,20.8,1
4,87.0,23.6,1
5,110.1,19.2,1
6,108.0,17.6,1
7,82.8,22.4,1
8,69.0,20.0,1
9,93.0,20.8,1


# SPLIT THE DATA INTO TRAINING AND TESTING SETS

In [4]:
from sklearn.model_selection import train_test_split
X= df[['Income', 'Lot_Size']]
y=df['Ownership']
X_train, X_test, y_train, y_test = train_test_split(X,y ,random_state=15, test_size=0.30)

# DATA MODELING

In [5]:
performance = pd.DataFrame({"model": [], "Accuracy": [], "Precision": [], "Recall": [], "F1": []})

## SVM Classification Model Using Linear Kernel

In [6]:
svm_lin_model = SVC(kernel="linear", probability=True)
_ = svm_lin_model.fit(X_train, np.ravel(y_train))

In [7]:
model_preds = svm_lin_model.predict(X_test)
c_matrix = confusion_matrix(y_test, model_preds)
TP = c_matrix[1][1]
TN = c_matrix[0][0]
FP = c_matrix[0][1]
FN = c_matrix[1][0]
performance = pd.concat([performance, pd.DataFrame({'model':"svm with linear kernel", 
                                                    'Accuracy': [(TP+TN)/(TP+TN+FP+FN)], 
                                                    'Precision': [TP/(TP+FP)], 
                                                    'Recall': [TP/(TP+FN)], 
                                                    'F1': [2*TP/(2*TP+FP+FN)]
                                                     }, index=[0])])

## SVM Classification Model Using RBF Kernel

In [8]:
svm_rbf_model = SVC(kernel="rbf", C=10, gamma='scale', probability=True)
_ = svm_rbf_model.fit(X_train, np.ravel(y_train))

In [9]:
model_preds = svm_rbf_model.predict(X_test)
c_matrix = confusion_matrix(y_test, model_preds)
TP = c_matrix[1][1]
TN = c_matrix[0][0]
FP = c_matrix[0][1]
FN = c_matrix[1][0]
performance = pd.concat([performance, pd.DataFrame({'model':"svm with rbf kernel", 
                                                    'Accuracy': [(TP+TN)/(TP+TN+FP+FN)], 
                                                    'Precision': [TP/(TP+FP)], 
                                                    'Recall': [TP/(TP+FN)], 
                                                    'F1': [2*TP/(2*TP+FP+FN)]
                                                     }, index=[0])])

## SVM Classification Model Using Polynomial Kernel

In [10]:
svm_poly_model = SVC(kernel="poly", degree=3, coef0=1, C=10, probability=True)
_ = svm_poly_model.fit(X_train, np.ravel(y_train))

In [11]:
model_preds = svm_poly_model.predict(X_test)
c_matrix = confusion_matrix(y_test, model_preds)
TP = c_matrix[1][1]
TN = c_matrix[0][0]
FP = c_matrix[0][1]
FN = c_matrix[1][0]
performance = pd.concat([performance, pd.DataFrame({'model':"svm with polynomial kernel", 
                                                    'Accuracy': [(TP+TN)/(TP+TN+FP+FN)], 
                                                    'Precision': [TP/(TP+FP)], 
                                                    'Recall': [TP/(TP+FN)], 
                                                    'F1': [2*TP/(2*TP+FP+FN)]
                                                     }, index=[0])])

# Summary

Sorted by accuracy, the best models are:

In [12]:
performance.sort_values(by=['Accuracy'])

Unnamed: 0,model,Accuracy,Precision,Recall,F1
0,svm with rbf kernel,0.75,1.0,0.5,0.666667
0,svm with polynomial kernel,0.875,1.0,0.75,0.857143
0,svm with linear kernel,1.0,1.0,1.0,1.0


Sorted by Precision, the best models are:

In [13]:
performance.sort_values(by=['Precision'])

Unnamed: 0,model,Accuracy,Precision,Recall,F1
0,svm with linear kernel,1.0,1.0,1.0,1.0
0,svm with rbf kernel,0.75,1.0,0.5,0.666667
0,svm with polynomial kernel,0.875,1.0,0.75,0.857143


Sorted by Recall, the best models are:

In [14]:
performance.sort_values(by=['Recall'])

Unnamed: 0,model,Accuracy,Precision,Recall,F1
0,svm with rbf kernel,0.75,1.0,0.5,0.666667
0,svm with polynomial kernel,0.875,1.0,0.75,0.857143
0,svm with linear kernel,1.0,1.0,1.0,1.0


Sorted by F1, the best models are:

In [15]:
performance.sort_values(by=['F1'])

Unnamed: 0,model,Accuracy,Precision,Recall,F1
0,svm with rbf kernel,0.75,1.0,0.5,0.666667
0,svm with polynomial kernel,0.875,1.0,0.75,0.857143
0,svm with linear kernel,1.0,1.0,1.0,1.0


Upon looking the values of Accuracy, Precision, F1, Recall, we can say that "SVM with Linear Kernel" is Overfitting.
So we will consider "svm with polynomial kernel" as the best performing one.

# Save the Best model as Pickle file

In [16]:
import pickle
pickle.dump(svm_poly_model,open("C:/Users/18137/Box/DSP/SVM/pickle.pkl",'wb'))