# Support Vector Machines

#### <font color="green">Import necessary libraries</font>

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn import datasets  # library to download and use data from

### <font color="green">Import data</font>

In [2]:
dataset = datasets.load_iris()
print("Dataset loaded.")

Dataset loaded.


In [3]:
# Prepare X and y with features and ground truth
X = dataset.data[:, :2]
y = dataset.target

In [4]:
# now split the data into training data and test data (80/20 split)
# random_state = k sets a specific random seed
# since random_state is fixed, this call will always produce the same split
# if you leave out the random_state value, you will get a different random split when you run it again
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

### <font color="green">For documentation (possible parameters, attributes and example code) on the SVC classifier, click on [SVC](https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html#sklearn.svm.SVC)</font>

In [5]:
from sklearn.svm import SVC

# create our model and fit it to our training data
clf = SVC(kernel='rbf',C=1) # default values for kernel and C 
clf.fit(X_train, y_train) 

# make predictions on test data
predicted = clf.predict(X_test)

### <font color="green">Performance analysis of the classifier</font>

In [6]:
from sklearn import metrics
from termcolor import colored

# print confusion matrix. 
# **Note** since the ground truth is not binary (has 3 labels), the confusion matrix is a 3x3 matrix
print(colored('confusion matrix:\n', 'green'), metrics.confusion_matrix(y_test, predicted))

# print classifier accuracy
print(colored('\naccuracy:', 'blue'), metrics.accuracy_score(y_test, predicted))

# print classification report (Precision, reall, and F1 score for each label, and average)
print(colored('\nclassification report:\n', 'green'),metrics.classification_report(y_test, predicted))

[32mconfusion matrix:
[0m [[11  0  0]
 [ 0  8  5]
 [ 0  3  3]]
[34m
accuracy:[0m 0.7333333333333333
[32m
classification report:
[0m               precision    recall  f1-score   support

           0       1.00      1.00      1.00        11
           1       0.73      0.62      0.67        13
           2       0.38      0.50      0.43         6

    accuracy                           0.73        30
   macro avg       0.70      0.71      0.70        30
weighted avg       0.76      0.73      0.74        30



**<font color="green" size=3>Experiment with parameter values</font>**

**<font color="red" size=3>  Daniel Harnden's Code:</font>**

In [7]:
# try out other kernels ('linear' 'poly' 'sigmoid') and other values for C

# linear
linearParams = SVC(kernel='linear',C=5)
linearParams.fit(X_train, y_train) 
linearPredicted = linearParams.predict(X_test)

print(colored('Linear confusion matrix:\n', 'red'), metrics.confusion_matrix(y_test, linearPredicted))
metrics.accuracy_score(y_test, linearPredicted)
print(colored('\nLinear classification report:\n', 'red'),metrics.classification_report(y_test, linearPredicted))



# poly
polyParams = SVC(kernel='poly',C=5)
polyParams.fit(X_train, y_train) 
polyPredicted = polyParams.predict(X_test)

print(colored('\n\n\nPoly confusion matrix:\n', 'red'), metrics.confusion_matrix(y_test, polyPredicted))
metrics.accuracy_score(y_test, polyPredicted)
print(colored('\nPoly classification report:\n', 'red'),metrics.classification_report(y_test, polyPredicted))



# sigmoid
sigmoidParams = SVC(kernel='sigmoid',C=5)
sigmoidParams.fit(X_train, y_train) 
sigmoidPredicted = sigmoidParams.predict(X_test)

print(colored('\n\n\nSigmoid confusion matrix:\n', 'red'), metrics.confusion_matrix(y_test, sigmoidPredicted))
metrics.accuracy_score(y_test, sigmoidPredicted)
print(colored('\nSigmoid classification report:\n', 'red'),metrics.classification_report(y_test, sigmoidPredicted, zero_division=0))

[31mLinear confusion matrix:
[0m [[11  0  0]
 [ 0  8  5]
 [ 0  4  2]]
[31m
Linear classification report:
[0m               precision    recall  f1-score   support

           0       1.00      1.00      1.00        11
           1       0.67      0.62      0.64        13
           2       0.29      0.33      0.31         6

    accuracy                           0.70        30
   macro avg       0.65      0.65      0.65        30
weighted avg       0.71      0.70      0.71        30

[31m


Poly confusion matrix:
[0m [[11  0  0]
 [ 0  5  8]
 [ 0  3  3]]
[31m
Poly classification report:
[0m               precision    recall  f1-score   support

           0       1.00      1.00      1.00        11
           1       0.62      0.38      0.48        13
           2       0.27      0.50      0.35         6

    accuracy                           0.63        30
   macro avg       0.63      0.63      0.61        30
weighted avg       0.69      0.63      0.64        30

[31m


Sigmo

#### <font color="green">Hyperparameter tuning using GridSearchCV</font>

In [8]:
from sklearn.model_selection import GridSearchCV

parameters = {'kernel':('linear', 'rbf', 'poly'), 'C':[1, 5, 10]}

svc = SVC()

clf = GridSearchCV(svc, parameters, n_jobs=-1) # n_jobs -> number of parallel jobs
                                               # -1 -> whatever the architecture allows

clf.fit(X_train, y_train)

print(colored('Best parameters:', 'red'), clf.best_params_,"\n") # print best parameters

# make predictions on test data
predicted = clf.predict(X_test)

# print accuracy
print(colored('\naccuracy:', 'red'), metrics.accuracy_score(y_test, predicted))

# print precision and recall statistics
print(colored('\nclassification report:\n', 'red'),metrics.classification_report(y_test, predicted))

# print confusion matrix
print(colored('confusion matrix:\n', 'red'),metrics.confusion_matrix(y_test, predicted))

[31mBest parameters:[0m {'C': 1, 'kernel': 'rbf'} 

[31m
accuracy:[0m 0.7333333333333333
[31m
classification report:
[0m               precision    recall  f1-score   support

           0       1.00      1.00      1.00        11
           1       0.73      0.62      0.67        13
           2       0.38      0.50      0.43         6

    accuracy                           0.73        30
   macro avg       0.70      0.71      0.70        30
weighted avg       0.76      0.73      0.74        30

[31mconfusion matrix:
[0m [[11  0  0]
 [ 0  8  5]
 [ 0  3  3]]


**<font color="green" size=4>Now follow the code above and implement LinearSVC on the same data. Fit classifier and predict, then calculate Accuracy, print confusion matrix and classification report.</font>**

**<font color="red" size=3>  Daniel Harnden's Code:</font>**

### <font color="green">Documentation for [LinearSVC](https://scikit-learn.org/stable/modules/generated/sklearn.svm.LinearSVC.html#sklearn.svm.LinearSVC)</font>

In [9]:
# Import linear SVC (Everything else has been imported earlier)
from sklearn.svm import LinearSVC
# Only implement the classifier and calculate performance. No need to do train_test_split.
lSVC = LinearSVC(random_state=0, tol=1e-10)
lSVC.fit(X_train, y_train)
lSVCpred = lSVC.predict(X_test)

print(colored('Linear SVC confusion matrix:\n', 'red'), metrics.confusion_matrix(y_test, lSVCpred))
metrics.accuracy_score(y_test, lSVCpred)
print(colored('\nLinear SVC classification report:\n', 'red'),metrics.classification_report(y_test, lSVCpred))

[31mLinear SVC confusion matrix:
[0m [[11  0  0]
 [ 0  4  9]
 [ 0  2  4]]
[31m
Linear SVC classification report:
[0m               precision    recall  f1-score   support

           0       1.00      1.00      1.00        11
           1       0.67      0.31      0.42        13
           2       0.31      0.67      0.42         6

    accuracy                           0.63        30
   macro avg       0.66      0.66      0.61        30
weighted avg       0.72      0.63      0.63        30





**<font color="green" size=4>Now find the best parameter for your classifier using GridSearchCV (like shown above). Calculate Accuracy, print best parameters, confusion matrix and classification report.</font>**\

**<font color="red" size=3>  Daniel Harnden's Code:</font>**

In [10]:
# your code here
GSCVparams = {'kernel':('linear', 'linear', 'linear', 'rbf', 'rbf', 'rbf', 'poly', 'poly', 'poly', 'sigmoid', 'sigmoid', 'sigmoid'), 'C':[1, 5, 10, 1, 5, 10, 1, 5, 10, 1, 5, 10]}
svc = SVC()
GSCVclf = GridSearchCV(svc, GSCVparams, n_jobs=-1)
GSCVclf.fit(X_train, y_train)
GSCVpred = GSCVclf.predict(X_test)


print(colored('\nAccuracy:', 'red'), metrics.accuracy_score(y_test, GSCVpred))
print(colored('\nBest parameters:', 'red'), GSCVclf.best_params_,"\n")
print(colored('\nGridSearchCV confusion matrix:\n', 'red'),metrics.confusion_matrix(y_test, GSCVpred))
print(colored('\nGridSearchCV classification report:\n', 'red'),metrics.classification_report(y_test, GSCVpred))

[31m
Accuracy:[0m 0.7333333333333333
[31m
Best parameters:[0m {'C': 1, 'kernel': 'rbf'} 

[31m
GridSearchCV confusion matrix:
[0m [[11  0  0]
 [ 0  8  5]
 [ 0  3  3]]
[31m
GridSearchCV classification report:
[0m               precision    recall  f1-score   support

           0       1.00      1.00      1.00        11
           1       0.73      0.62      0.67        13
           2       0.38      0.50      0.43         6

    accuracy                           0.73        30
   macro avg       0.70      0.71      0.70        30
weighted avg       0.76      0.73      0.74        30

