In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn import datasets
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

# SIDE NOTES
# My work may or may not be correct, please double check before using

# 0 = Iris-Setosa
# 1 = Iris-Versicolor
# 2 = Iris-Virginica

In [2]:
iris = datasets.load_iris()
X = iris.data
y = iris.target

In [3]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.30)

# Linear Kernel

In [4]:
linear_svclassifier = SVC(kernel='linear', gamma = 'auto', degree = 8) 
linear_svclassifier.fit(X_train, y_train)
y_pred = linear_svclassifier.predict(X_test)
print(classification_report(y_test, y_pred))
data = linear_svclassifier.score(X_test, y_test)
print(data)

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        15
           1       1.00      0.93      0.97        15
           2       0.94      1.00      0.97        15

    accuracy                           0.98        45
   macro avg       0.98      0.98      0.98        45
weighted avg       0.98      0.98      0.98        45

0.9777777777777777


# RBF kernel

In [5]:
RBF_svclassifier = SVC(kernel='rbf', gamma = 'auto', random_state = 0, degree = 8)
RBF_svclassifier.fit(X_train, y_train)
y_pred = RBF_svclassifier.predict(X_test)
print(classification_report(y_test, y_pred))
data = RBF_svclassifier.score(X_test, y_test)
print(data)

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        15
           1       1.00      1.00      1.00        15
           2       1.00      1.00      1.00        15

    accuracy                           1.00        45
   macro avg       1.00      1.00      1.00        45
weighted avg       1.00      1.00      1.00        45

1.0


# Poly kernel

In [6]:
poly_svclassifier = SVC(kernel='poly', gamma = 'auto', random_state = 0, degree = 8)
poly_svclassifier.fit(X_train, y_train)
y_pred = poly_svclassifier.predict(X_test)
print(classification_report(y_test, y_pred))
data = poly_svclassifier.score(X_test, y_test)
print("accuracy of:", data)

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        15
           1       0.79      0.73      0.76        15
           2       0.75      0.80      0.77        15

    accuracy                           0.84        45
   macro avg       0.85      0.84      0.84        45
weighted avg       0.85      0.84      0.84        45

accuracy of: 0.8444444444444444


# Precomputed kernel

In [7]:
PRE_svclassifier = SVC(kernel='precomputed', gamma = 'auto', random_state = 0, degree = 8)
kernal_train = np.dot(X_train, X_train.T)
kernel_test = np.dot(X_test, X_train.T)
PRE_svclassifier.fit(kernal_train, y_train)
y_pred = PRE_svclassifier.predict(kernel_test)
print(classification_report(y_test, y_pred))
data = PRE_svclassifier.score(kernel_test, y_test)
print("accuracy of:", data)

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        15
           1       1.00      0.93      0.97        15
           2       0.94      1.00      0.97        15

    accuracy                           0.98        45
   macro avg       0.98      0.98      0.98        45
weighted avg       0.98      0.98      0.98        45

accuracy of: 0.9777777777777777


# Discussion

In this assignment, we apply SKlearn's support vector machine onto the iris dataset. I used 4 different types of kernels, "linear kernel", "gaussian kernel", "polynomial kernel" and the "precomputed kernel". Running the iris dataset into each kernel, i discovered that the accuracy for for each kernel differed and could change upon rerunning the program.

I discovered that the purpose of using different kernels is so that we can accomodate for the randomness in the dataset, so that we can map out the data-points more easily so training can be done more efficiently. In our data shown above, we can notice that precomputed and linear model has 0.97/0.98 accuracy which shows that the classification of the data set could be linear. Furthermore, in our gaussian kernel, we have a 1 accuracy which means that the trend of the dataset follows a gaussian model compleely and not a polynomial model which has the lowest accuracy, 0.84.