# Support Vector Machines

#### Import necessary libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn import datasets  # library to download and use data from

### Import data

In [2]:
dataset = datasets.load_iris()
print("Dataset loaded.")

Dataset loaded.


In [3]:
# Prepare X and y with features and ground truth
X = dataset.data[:, :2]
y = dataset.target

In [4]:
# now split the data into training data and test data (80/20 split)
# random_state = k sets a specific random seed
# since random_state is fixed, this call will always produce the same split
# if you leave out the random_state value, you will get a different random split when you run it again
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

### For documentation (possible parameters, attributes and example code) on the SVC classifier, click on [SVC](https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html#sklearn.svm.SVC) 

In [5]:
from sklearn.svm import SVC

# create our model and fit it to our training data
clf = SVC(kernel='rbf',C=1) # default values for kernel and C 
clf.fit(X_train, y_train) 

# make predictions on test data
predicted = clf.predict(X_test)

### Performance analysis of the classifier

In [6]:
from sklearn import metrics
from termcolor import colored

# print confusion matrix. 
# **Note** since the ground truth is not binary (has 3 labels), the confusion matrix is a 3x3 matrix
print(colored('confusion matrix:\n', 'green'), metrics.confusion_matrix(y_test, predicted))

# print classifier accuracy
print(colored('\naccuracy:', 'blue'), metrics.accuracy_score(y_test, predicted))

# print classification report (Precision, reall, and F1 score for each label, and average)
print(colored('\nclassification report:\n', 'green'),metrics.classification_report(y_test, predicted))

[32mconfusion matrix:
[0m [[11  0  0]
 [ 0  8  5]
 [ 0  3  3]]
[34m
accuracy:[0m 0.7333333333333333
[32m
classification report:
[0m               precision    recall  f1-score   support

           0       1.00      1.00      1.00        11
           1       0.73      0.62      0.67        13
           2       0.38      0.50      0.43         6

    accuracy                           0.73        30
   macro avg       0.70      0.71      0.70        30
weighted avg       0.76      0.73      0.74        30



**<font color="red" size=3>Experiment with parameter values</font>**

In [7]:
# try out other kernels ('linear' 'poly' 'sigmoid') and other values for C
clf = SVC(kernel='linear',C=5) # default values for kernel and C 
clf.fit(X_train, y_train) 

predicted = clf.predict(X_test)
print(colored('confusion matrix:\n', 'green'), metrics.confusion_matrix(y_test, predicted))
print(colored('\naccuracy:', 'blue'), metrics.accuracy_score(y_test, predicted))
print(colored('\nclassification report:\n', 'green'),metrics.classification_report(y_test, predicted))

[32mconfusion matrix:
[0m [[11  0  0]
 [ 0  8  5]
 [ 0  4  2]]
[34m
accuracy:[0m 0.7
[32m
classification report:
[0m               precision    recall  f1-score   support

           0       1.00      1.00      1.00        11
           1       0.67      0.62      0.64        13
           2       0.29      0.33      0.31         6

    accuracy                           0.70        30
   macro avg       0.65      0.65      0.65        30
weighted avg       0.71      0.70      0.71        30



#### Hyperparameter tuning using GridSearchCV 

In [14]:
from sklearn.model_selection import GridSearchCV

parameters = {'kernel':('linear', 'rbf', 'poly'), 'C':[1, 5, 10]}

svc = SVC()

clf = GridSearchCV(svc, parameters, n_jobs=-1) # n_jobs -> number of parallel jobs
                                               # -1 -> whatever the architecture allows

clf.fit(X_train, y_train)

print(colored('Best parameters:', 'green'), clf.best_params_,"\n") # print best parameters

# make predictions on test data
predicted = clf.predict(X_test)

# print accuracy
print(colored('\naccuracy:', 'green'), metrics.accuracy_score(y_test, predicted))

# print precision and recall statistics
print(colored('\nclassification report:\n', 'green'),metrics.classification_report(y_test, predicted))

# print confusion matrix
print(colored('confusion matrix:\n', 'green'),metrics.confusion_matrix(y_test, predicted))

[32mBest parameters:[0m {'C': 1, 'kernel': 'rbf'} 

[32m
accuracy:[0m 0.7333333333333333
[32m
classification report:
[0m               precision    recall  f1-score   support

           0       1.00      1.00      1.00        11
           1       0.73      0.62      0.67        13
           2       0.38      0.50      0.43         6

    accuracy                           0.73        30
   macro avg       0.70      0.71      0.70        30
weighted avg       0.76      0.73      0.74        30

[32mconfusion matrix:
[0m [[11  0  0]
 [ 0  8  5]
 [ 0  3  3]]


**<font color="red" size=4>Now follow the code above and implement LinearSVC on the same data. Fit classifier and predict, then calculate Accuracy, print confusion matrix and classification report.</font>**

### Documentation for [LinearSVC](https://scikit-learn.org/stable/modules/generated/sklearn.svm.LinearSVC.html#sklearn.svm.LinearSVC) 

In [16]:
# Import linear SVC (Everything else has been imported earlier)
from sklearn.svm import LinearSVC
# Only implement the classifier and calculate performance. No need to do train_test_split.

clf = LinearSVC(verbose=0)
clf.fit(X_train, y_train)
# print(colored('Best parameters:', 'green'), clf.best_params_,"\n") # print best parameters

# make predictions on test data
predicted = clf.predict(X_test)

# print accuracy
print(colored('\naccuracy:', 'green'), metrics.accuracy_score(y_test, predicted))

# print precision and recall statistics
print(colored('\nclassification report:\n', 'green'),metrics.classification_report(y_test, predicted))

# print confusion matrix
print(colored('confusion matrix:\n', 'green'),metrics.confusion_matrix(y_test, predicted))

[1 1 0 2 0 2 0 2 2 2 2 2 2 2 2 0 2 1 0 0 1 1 0 0 2 0 0 2 1 0]
[32m
accuracy:[0m 0.6333333333333333
[32m
classification report:
[0m               precision    recall  f1-score   support

           0       1.00      1.00      1.00        11
           1       0.67      0.31      0.42        13
           2       0.31      0.67      0.42         6

    accuracy                           0.63        30
   macro avg       0.66      0.66      0.61        30
weighted avg       0.72      0.63      0.63        30

[32mconfusion matrix:
[0m [[11  0  0]
 [ 0  4  9]
 [ 0  2  4]]




**<font color="red" size=4>Now find the best parameter for your classifier using GridSearchCV (like shown above). Calculate Accuracy, print best parameters, confusion matrix and classification report.</font>**

In [29]:
# your code here
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

SVCpipe = Pipeline([('scale', StandardScaler()),
                   ('SVC',LinearSVC())])
param_grid = {'SVC__C':np.arange(0.01,100,10)}
clf = GridSearchCV(SVCpipe,param_grid,cv=4,return_train_score=True)
clf.fit(X_train,y_train)

print(colored('Best parameters:', 'green'), clf.best_params_,"\n") # print best parameters

# make predictions on test data
predicted = clf.predict(X_test)

# print accuracy
print(colored('\naccuracy:', 'green'), metrics.accuracy_score(y_test, predicted))

# print precision and recall statistics
print(colored('\nclassification report:\n', 'green'),metrics.classification_report(y_test, predicted))

# print confusion matrix
print(colored('confusion matrix:\n', 'green'),metrics.confusion_matrix(y_test, predicted))



[32mBest parameters:[0m {'SVC__C': 80.01} 

[32m
accuracy:[0m 0.6
[32m
classification report:
[0m               precision    recall  f1-score   support

           0       1.00      1.00      1.00        11
           1       0.57      0.31      0.40        13
           2       0.25      0.50      0.33         6

    accuracy                           0.60        30
   macro avg       0.61      0.60      0.58        30
weighted avg       0.66      0.60      0.61        30

[32mconfusion matrix:
[0m [[11  0  0]
 [ 0  4  9]
 [ 0  3  3]]


