**1. SETUP**

Import Modules

In [63]:
import pandas as pd
from sklearn.svm import SVC
from matplotlib import pyplot as plt
import numpy as np
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split

np.random.seed(1)

Loading Data 

In [64]:
ridingMowers = pd.read_csv('./RidingMowers.csv') # let's use the same data as we did in the logistic regression example
ridingMowers.head(3)

Unnamed: 0,Income,Lot_Size,Ownership
0,60.0,18.4,Owner
1,85.5,16.8,Owner
2,64.8,21.6,Owner


In [65]:
#generate a basic summary of the data
ridingMowers.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 24 entries, 0 to 23
Data columns (total 3 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Income     24 non-null     float64
 1   Lot_Size   24 non-null     float64
 2   Ownership  24 non-null     object 
dtypes: float64(2), object(1)
memory usage: 704.0+ bytes


**2. Split Data (Train/Test)**

In [66]:
# split the data into validation and training set
train_df, test_df = train_test_split(ridingMowers, test_size=0.3)

# to reduce repetition in later code, create variables to represent the columns
# that are our predictors and target
target = 'Ownership'
predictors = list(ridingMowers.columns)
predictors.remove(target)

x_train=train_df[predictors]
y_train=train_df[target]
x_test=test_df[predictors]
y_test=test_df[target]

**3. Model the data** 

First, let's create a dataframe to load the model performance metrics into.

In [67]:
performance = pd.DataFrame({"model": [], "Accuracy": [], "Precision": [], "Recall": [], "F1": []})

**SVM model using linear kernel**  

In [68]:
svm_linear_model = SVC(kernel="linear", probability= True)
linear = svm_linear_model.fit(x_train, np.ravel(y_train))

In [69]:
model_preds = svm_linear_model.predict(x_test)
c_matrix = confusion_matrix(y_test, model_preds)
TP = c_matrix[1][1]
TN = c_matrix[0][0]
FP = c_matrix[0][1]
FN = c_matrix[1][0]
performance = pd.concat([performance, pd.DataFrame({'model':"linear svm", 
                                                    'Accuracy': [(TP+TN)/(TP+TN+FP+FN)], 
                                                    'Precision': [TP/(TP+FP)], 
                                                    'Recall': [TP/(TP+FN)], 
                                                    'F1': [2*TP/(2*TP+FP+FN)]
                                                     }, index=[0])])

**SVM model using rbf kernel** 

In [70]:
svm_rbf_model = SVC(kernel="rbf", C=10, gamma='scale')
rbf = svm_rbf_model.fit(x_train, np.ravel(y_train))

In [71]:
model_preds = svm_rbf_model.predict(x_test)
c_matrix = confusion_matrix(y_test, model_preds)
TP = c_matrix[1][1]
TN = c_matrix[0][0]
FP = c_matrix[0][1]
FN = c_matrix[1][0]
performance = pd.concat([performance, pd.DataFrame({'model':"rbf svm", 
                                                    'Accuracy': [(TP+TN)/(TP+TN+FP+FN)], 
                                                    'Precision': [TP/(TP+FP)], 
                                                    'Recall': [TP/(TP+FN)], 
                                                    'F1': [2*TP/(2*TP+FP+FN)]
                                                     }, index=[0])])

**SVM model using polynomial kernel**

In [72]:
svm_poly_model = SVC(kernel="poly", degree=3, coef0=1, C=10)
poly = svm_poly_model.fit(x_train, np.ravel(y_train))

In [73]:
model_preds = svm_poly_model.predict(x_test)
c_matrix = confusion_matrix(y_test, model_preds)
TP = c_matrix[1][1]
TN = c_matrix[0][0]
FP = c_matrix[0][1]
FN = c_matrix[1][0]
performance = pd.concat([performance, pd.DataFrame({'model':"poly svm", 
                                                    'Accuracy': [(TP+TN)/(TP+TN+FP+FN)], 
                                                    'Precision': [TP/(TP+FP)], 
                                                    'Recall': [TP/(TP+FN)], 
                                                    'F1': [2*TP/(2*TP+FP+FN)]
                                                     }, index=[0])])

**4. Summary**

In [74]:
performance

Unnamed: 0,model,Accuracy,Precision,Recall,F1
0,linear svm,1.0,1.0,1.0,1.0
0,rbf svm,0.75,0.666667,0.666667,0.666667
0,poly svm,0.875,1.0,0.666667,0.8


As per the above performance df, we can see that SVM model using linear kernel is the best fitting model as it has the best accuracy, precision, recall, F1 score.

**5. Save to disk**

In [75]:
import pickle

# save model
pickle.dump(linear, open('C:/Users/risha/Downloads/model_ownership.pkl', "wb"))
