In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.svm import SVC
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.metrics import classification_report

In [2]:
# scikit learn has an iris dataset similiar to the one used in the previous assignment, so I will
# be using this instead of downloading the data and getting it from its csv file.
iris = datasets.load_iris()
# iris.data will have the measurements of the iris
X = iris.data
# iris.target will have the classification of the iris
y = iris.target

In [3]:
# Source for the kernels and running SVC: https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html

# 70% Training and 30% Testing

In [4]:
# trying 70% training and 30% testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.30)

## Linear Kernel

In [5]:
l_svc = SVC(kernel = 'linear')
l_svc.fit(X_train, y_train)
y_pred = l_svc.predict(X_test)
print(classification_report(y_test, y_pred))
print("Accuracy:", metrics.accuracy_score(y_test, y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      0.95      0.98        21
           2       0.93      1.00      0.97        14

    accuracy                           0.98        45
   macro avg       0.98      0.98      0.98        45
weighted avg       0.98      0.98      0.98        45

Accuracy: 0.9777777777777777


## Poly Kernel

In [6]:
poly_svc = SVC(kernel = 'poly')
poly_svc.fit(X_train, y_train)
y_pred = poly_svc.predict(X_test)
print(classification_report(y_test, y_pred))
print("Accuracy:", metrics.accuracy_score(y_test, y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      0.90      0.95        21
           2       0.88      1.00      0.93        14

    accuracy                           0.96        45
   macro avg       0.96      0.97      0.96        45
weighted avg       0.96      0.96      0.96        45

Accuracy: 0.9555555555555556


## RBF Kernel

In [7]:
rbf_svc = SVC(kernel = 'rbf')
rbf_svc.fit(X_train, y_train)
y_pred = rbf_svc.predict(X_test)
print(classification_report(y_test, y_pred))
print("Accuracy:", metrics.accuracy_score(y_test, y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      0.90      0.95        21
           2       0.88      1.00      0.93        14

    accuracy                           0.96        45
   macro avg       0.96      0.97      0.96        45
weighted avg       0.96      0.96      0.96        45

Accuracy: 0.9555555555555556


## Precomputed Kernel

In [8]:
# Source to get this kernel working:
# https://stats.stackexchange.com/questions/92101/prediction-with-scikit-and-an-precomputed-kernel-svm

precomputed_svc = SVC(kernel = 'precomputed')
kernel_train = np.dot(X_train, X_train.T)
kernel_test = np.dot(X_test, X_train.T)
precomputed_svc.fit(kernel_train, y_train)
y_pred = precomputed_svc.predict(kernel_test)
print(classification_report(y_test, y_pred))
print("Accuracy:", metrics.accuracy_score(y_test, y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      0.95      0.98        21
           2       0.93      1.00      0.97        14

    accuracy                           0.98        45
   macro avg       0.98      0.98      0.98        45
weighted avg       0.98      0.98      0.98        45

Accuracy: 0.9777777777777777


# 60% Training and 40% Testing

In [9]:
# for experimenting, trying 60% training and 40% testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.40)

## Linear Kernel

In [10]:
l_svc = SVC(kernel = 'linear')
l_svc.fit(X_train, y_train)
y_pred = l_svc.predict(X_test)
print(classification_report(y_test, y_pred))
print("Accuracy:", metrics.accuracy_score(y_test, y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        19
           1       1.00      0.96      0.98        25
           2       0.94      1.00      0.97        16

    accuracy                           0.98        60
   macro avg       0.98      0.99      0.98        60
weighted avg       0.98      0.98      0.98        60

Accuracy: 0.9833333333333333


## Poly Kernel

In [11]:
poly_svc = SVC(kernel = 'poly')
poly_svc.fit(X_train, y_train)
y_pred = poly_svc.predict(X_test)
print(classification_report(y_test, y_pred))
print("Accuracy:", metrics.accuracy_score(y_test, y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        19
           1       1.00      0.96      0.98        25
           2       0.94      1.00      0.97        16

    accuracy                           0.98        60
   macro avg       0.98      0.99      0.98        60
weighted avg       0.98      0.98      0.98        60

Accuracy: 0.9833333333333333


## RBF Kernel

In [12]:
rbf_svc = SVC(kernel = 'rbf')
rbf_svc.fit(X_train, y_train)
y_pred = rbf_svc.predict(X_test)
print(classification_report(y_test, y_pred))
print("Accuracy:", metrics.accuracy_score(y_test, y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        19
           1       0.96      0.92      0.94        25
           2       0.88      0.94      0.91        16

    accuracy                           0.95        60
   macro avg       0.95      0.95      0.95        60
weighted avg       0.95      0.95      0.95        60

Accuracy: 0.95


## Precomputed Kernel

In [13]:
precomputed_svc = SVC(kernel = 'precomputed')
kernel_train = np.dot(X_train, X_train.T)
kernel_test = np.dot(X_test, X_train.T)
precomputed_svc.fit(kernel_train, y_train)
y_pred = precomputed_svc.predict(kernel_test)
print(classification_report(y_test, y_pred))
print("Accuracy:", metrics.accuracy_score(y_test, y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        19
           1       1.00      0.96      0.98        25
           2       0.94      1.00      0.97        16

    accuracy                           0.98        60
   macro avg       0.98      0.99      0.98        60
weighted avg       0.98      0.98      0.98        60

Accuracy: 0.9833333333333333


# Results and Discussion 

Based on the results and some experimenting, the accuracy for each kernel differs everytime the code is run. To try and see some sort of different result, I did two tests: one test with 70% training and 30% testing, and another test with 60% training and 40% testing. With 70% training and 30% testing, the linear and precomputed kernels seems to be within the same accuracy, and the poly and rbf kernel seems to be within the same accuracy. But with 60% training and 40% testing, we have totally different results, with rfb kernel being the only kernel with a different accuracy than the other 3 kernels being used. In the end, different kernels are used to help us map out and classify the randomness of data points which helps make training more efficiently. In the case of the Iris dataset, any of these kernels can be used as they give us a really good accuracy.