In [1]:
import scipy.io
from sklearn import svm
import os
import pandas as pd
import numpy as np
from mat4py import loadmat
from sklearn import svm 
import time 
from sklearn.metrics import confusion_matrix

In [2]:
mydir = 'dataset'

In [3]:
code = pd.read_csv(os.path.join(mydir, "S1_code.csv"), header = None) 
x = pd.read_csv(os.path.join(mydir, "S1_x.csv"), header = None) 
y = pd.read_csv(os.path.join(mydir, "S1_y.csv"), header = None) 

In [38]:
xValues = x.to_numpy()
yValues = y.to_numpy()
codeValues = code.to_numpy()

In [4]:
codeNew = pd.read_csv(os.path.join(mydir, "S19_code.csv"), header = None) 
xNew = pd.read_csv(os.path.join(mydir, "S19_x.csv"), header = None) 
yNew = pd.read_csv(os.path.join(mydir, "S19_y.csv"), header = None) 

# SVM
## RBF default kernel

In [5]:
start = time.time() 
mysvm = svm.SVC()
mysvm.fit(x, y.values.ravel())
stop = time.time()
print(f"Training time: {stop - start}s")

Training time: 0.6559848785400391s


In [6]:
predictedSVM = mysvm.predict(x)
[[tn, fp],[fn, tp]] = confusion_matrix(y.values.ravel(), predictedSVM)
print(f"tp: {tp}") 
print(f"fp: {fp}")
print(f"tn: {tn}") 
print(f"fn: {fn}") 
accuracySVM = (tp+tn)/(tp+tn+fp+fn)
accuracySVM

tp: 3
fp: 0
tn: 2400
fn: 477


0.834375

In [7]:
predictedSVM = mysvm.predict(xNew)
[[tn, fp],[fn, tp]] = confusion_matrix(yNew.values.ravel(), predictedSVM)
print(f"tp: {tp}") 
print(f"fp: {fp}")
print(f"tn: {tn}") 
print(f"fn: {fn}") 
accuracySvmRbf = (tp+tn)/(tp+tn+fp+fn)
accuracySvmRbf

tp: 2
fp: 0
tn: 2400
fn: 478


0.8340277777777778

In [8]:
mysvm.decision_function(x)

array([-1.04249994, -0.97663928, -1.01059643, ..., -1.05329069,
       -1.0336933 , -0.99264511])


### Polynomial SVM kernel, degree = 2

In [9]:
start = time.time() 
svmPoly2 = svm.SVC(kernel='poly', degree=2)
svmPoly2.fit(x, y.values.ravel())
stop = time.time()
print(f"Training time: {stop - start}s")

Training time: 1.0357699394226074s


In [10]:
predictedSvmPoly2 = svmPoly2.predict(x)
[[tn, fp],[fn, tp]] = confusion_matrix(y.values.ravel(), predictedSvmPoly2)
print(f"tp: {tp}") 
print(f"fp: {fp}")
print(f"tn: {tn}") 
print(f"fn: {fn}") 
accuracySvmPoly2 = (tp+tn)/(tp+tn+fp+fn)
accuracySvmPoly2

tp: 4
fp: 0
tn: 2400
fn: 476


0.8347222222222223

In [11]:
 predictedSvmPoly2 = svmPoly2.predict(xNew)
[[tn, fp],[fn, tp]] = confusion_matrix(yNew.values.ravel(), predictedSvmPoly2)
print(f"tp: {tp}") 
print(f"fp: {fp}")
print(f"tn: {tn}") 
print(f"fn: {fn}") 
accuracySvmPoly2 = (tp+tn)/(tp+tn+fp+fn)
accuracySvmPoly2

tp: 3
fp: 0
tn: 2400
fn: 477


0.834375

### Polynomial SVM kernel, degree=5

In [12]:
start = time.time() 
svmPoly5 = svm.SVC(kernel='poly', degree=5)
svmPoly5.fit(x, y.values.ravel())
stop = time.time()
print(f"Training time: {stop - start}s")

Training time: 1.9832391738891602s


In [13]:
predictedSvmPoly5 = svmPoly5.predict(x)
[[tn, fp],[fn, tp]] = confusion_matrix(y.values.ravel(), predictedSvmPoly5)
print(f"tp: {tp}") 
print(f"fp: {fp}")
print(f"tn: {tn}") 
print(f"fn: {fn}") 
accuracySvmPoly5 = (tp+tn)/(tp+tn+fp+fn)
accuracySvmPoly5

tp: 30
fp: 0
tn: 2400
fn: 450


0.84375

In [49]:
halfX = np.split(xValues, 2)
halfY = np.split(yValues, 2)
start = time.time() 
svmPoly5 = svm.SVC(kernel='poly', degree=5)
svmPoly5.fit(halfX[1], halfY[1].ravel())
stop = time.time()
print(f"Training time: {stop - start}s")

Training time: 0.49340319633483887s


In [51]:
predictedSvmPoly5 = svmPoly5.predict(halfX[0])
[[tn, fp],[fn, tp]] = confusion_matrix(halfY[0].ravel(), predictedSvmPoly5)
print(f"tp: {tp}") 
print(f"fp: {fp}")
print(f"tn: {tn}") 
print(f"fn: {fn}") 
accuracySvmPoly5 = (tp+tn)/(tp+tn+fp+fn)
accuracySvmPoly5

tp: 0
fp: 0
tn: 1200
fn: 240


0.8333333333333334

In [14]:
predictedSvmPoly5 = svmPoly5.predict(xNew)
[[tn, fp],[fn, tp]] = confusion_matrix(yNew.values.ravel(), predictedSvmPoly5)
print(f"tp: {tp}") 
print(f"fp: {fp}")
print(f"tn: {tn}") 
print(f"fn: {fn}") 
accuracySvmPoly5 = (tp+tn)/(tp+tn+fp+fn)
accuracySvmPoly5

tp: 16
fp: 22
tn: 2378
fn: 464


0.83125

## K-fold cross validation

In [6]:
 from sklearn.model_selection import KFold


In [29]:
yValues = y.to_numpy()

In [82]:
kf = KFold(n_splits = 3, shuffle = False)
for train_index, test_index in kf.split(xValues):
    #print("TRAIN:", train_index, "TEST:", test_index)
    xTrain = xValues[train_index]
    xTest = xValues[test_index]
    yTrain = yValues[train_index]
    yTest = yValues[test_index]

In [83]:
start = time.time() 
svmPoly5 = svm.SVC(kernel='poly', degree=5)
svmPoly5.fit(xTrain, yTrain.ravel())
stop = time.time()
print(f"Training time: {stop - start}s")

Training time: 0.41872692108154297s


In [84]:
predictedSvmPoly5 = svmPoly5.predict(xTest)
[[tn, fp],[fn, tp]] = confusion_matrix(yTest.ravel(), predictedSvmPoly5)
print(f"tp: {tp}") 
print(f"fp: {fp}")
print(f"tn: {tn}") 
print(f"fn: {fn}") 
accuracySvmPoly5 = (tp+tn)/(tp+tn+fp+fn)
accuracySvmPoly5

tp: 15
fp: 54
tn: 746
fn: 145


0.7927083333333333

## k = 2 

In [52]:
kf = KFold(n_splits = 2, shuffle = False)
for train_index, test_index in kf.split(xValues):
    #print("TRAIN:", train_index, "TEST:", test_index)
    xTrain = xValues[train_index]
    xTest = xValues[test_index]
    yTrain = yValues[train_index]
    yTest = yValues[test_index]

In [53]:
start = time.time() 
svmPoly5 = svm.SVC(kernel='poly', degree=5)
svmPoly5.fit(xTrain, yTrain.ravel())
stop = time.time()
print(f"Training time: {stop - start}s")

Training time: 0.22530698776245117s


In [54]:
predictedSvmPoly5 = svmPoly5.predict(xTest)
[[tn, fp],[fn, tp]] = confusion_matrix(yTest.ravel(), predictedSvmPoly5)
print(f"tp: {tp}") 
print(f"fp: {fp}")
print(f"tn: {tn}") 
print(f"fn: {fn}") 
accuracySvmPoly5 = (tp+tn)/(tp+tn+fp+fn)
accuracySvmPoly5

tp: 16
fp: 50
tn: 1150
fn: 224


0.8097222222222222