In [8]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, f1_score

In [3]:
X_train = pd.read_pickle('./data/X_train_pickle.pkl')
X_test = pd.read_pickle('./data/X_test_pickle.pkl')
y_train = pd.read_pickle('./data/y_train_pickle.pkl')
y_test = pd.read_pickle('./data/y_test_pickle.pkl')

# Linear SVM

In [4]:
svclassifier = SVC(kernel='linear')
svclassifier.fit(X_train, y_train)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
    kernel='linear', max_iter=-1, probability=False, random_state=None,
    shrinking=True, tol=0.001, verbose=False)

In [5]:
y_pred = svclassifier.predict(X_test)

In [6]:
print(confusion_matrix(y_test,y_pred))
print(classification_report(y_test,y_pred))

[[4201  326]
 [ 624  848]]
              precision    recall  f1-score   support

           0       0.87      0.93      0.90      4527
           1       0.72      0.58      0.64      1472

    accuracy                           0.84      5999
   macro avg       0.80      0.75      0.77      5999
weighted avg       0.83      0.84      0.84      5999



# Polynomial Kernel

In [23]:
scores = {}
for num in range(1,8):
    svclassifier = SVC(kernel='poly', degree=num, gamma='scale')
    svclassifier.fit(X_train, y_train)
    y_pred = svclassifier.predict(X_test)
    scores[num] = accuracy_score(y_test, y_pred)

high = 0
for degree, accuracy in scores.items():
    if accuracy > high:
        high = accuracy
        deg = degree
print("A degree of {} results in the highest accuracy of {}".format(deg, round(high, 3)))

this is the result of 0 degrees
[[4527    0]
 [1472    0]]
              precision    recall  f1-score   support

           0       0.75      1.00      0.86      4527
           1       0.00      0.00      0.00      1472

    accuracy                           0.75      5999
   macro avg       0.38      0.50      0.43      5999
weighted avg       0.57      0.75      0.65      5999



  'precision', 'predicted', average, warn_for)


this is the result of 1 degrees
[[4208  319]
 [ 622  850]]
              precision    recall  f1-score   support

           0       0.87      0.93      0.90      4527
           1       0.73      0.58      0.64      1472

    accuracy                           0.84      5999
   macro avg       0.80      0.75      0.77      5999
weighted avg       0.84      0.84      0.84      5999

this is the result of 2 degrees
[[4256  271]
 [ 692  780]]
              precision    recall  f1-score   support

           0       0.86      0.94      0.90      4527
           1       0.74      0.53      0.62      1472

    accuracy                           0.84      5999
   macro avg       0.80      0.74      0.76      5999
weighted avg       0.83      0.84      0.83      5999

this is the result of 3 degrees
[[4239  288]
 [ 704  768]]
              precision    recall  f1-score   support

           0       0.86      0.94      0.90      4527
           1       0.73      0.52      0.61      1472

    a

In [None]:
svclassifier = SVC(kernel='poly', degree=1, gamma='scale')
svclassifier.fit(X_train, y_train)
y_pred = svclassifier.predict(X_test)
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

# Gaussian Kernel

In [24]:
svclassifier = SVC(kernel='rbf')
svclassifier.fit(X_train, y_train)



SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
    kernel='rbf', max_iter=-1, probability=False, random_state=None,
    shrinking=True, tol=0.001, verbose=False)

In [25]:
y_pred = svclassifier.predict(X_test)

In [26]:
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

[[4209  318]
 [ 637  835]]
              precision    recall  f1-score   support

           0       0.87      0.93      0.90      4527
           1       0.72      0.57      0.64      1472

    accuracy                           0.84      5999
   macro avg       0.80      0.75      0.77      5999
weighted avg       0.83      0.84      0.83      5999



# Sigmoid Kernel

In [27]:
svclassifier = SVC(kernel='sigmoid')
svclassifier.fit(X_train, y_train)



SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
    kernel='sigmoid', max_iter=-1, probability=False, random_state=None,
    shrinking=True, tol=0.001, verbose=False)

In [28]:
y_pred = svclassifier.predict(X_test)

In [29]:
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

[[4092  435]
 [ 623  849]]
              precision    recall  f1-score   support

           0       0.87      0.90      0.89      4527
           1       0.66      0.58      0.62      1472

    accuracy                           0.82      5999
   macro avg       0.76      0.74      0.75      5999
weighted avg       0.82      0.82      0.82      5999



In [30]:
kernel_options = ['linear', 'poly', 'rbf', 'sigmoid']
for kernel in kernel_options:
    svclassifier = SVC(kernel=kernel)
    svclassifier.fit(X_train, y_train)
    y_pred = svclassifier.predict(X_test)
    print("This is the report for the {} kernel".format(kernel))
    print(confusion_matrix(y_test, y_pred))
    print(classification_report(y_test, y_pred))

[[4201  326]
 [ 624  848]]
              precision    recall  f1-score   support

           0       0.87      0.93      0.90      4527
           1       0.72      0.58      0.64      1472

    accuracy                           0.84      5999
   macro avg       0.80      0.75      0.77      5999
weighted avg       0.83      0.84      0.84      5999





[[4239  288]
 [ 705  767]]
              precision    recall  f1-score   support

           0       0.86      0.94      0.90      4527
           1       0.73      0.52      0.61      1472

    accuracy                           0.83      5999
   macro avg       0.79      0.73      0.75      5999
weighted avg       0.83      0.83      0.82      5999





[[4209  318]
 [ 637  835]]
              precision    recall  f1-score   support

           0       0.87      0.93      0.90      4527
           1       0.72      0.57      0.64      1472

    accuracy                           0.84      5999
   macro avg       0.80      0.75      0.77      5999
weighted avg       0.83      0.84      0.83      5999





[[4092  435]
 [ 623  849]]
              precision    recall  f1-score   support

           0       0.87      0.90      0.89      4527
           1       0.66      0.58      0.62      1472

    accuracy                           0.82      5999
   macro avg       0.76      0.74      0.75      5999
weighted avg       0.82      0.82      0.82      5999



# Fill in the variables below with actual results

In [1]:
SVM_f1 = 0
SVM_accuracy = 0
svm = {
    'accuracy': SVM_accuracy,
    'f1': SVM_f1
}
%store svm

Stored 'svm' (dict)
