In [65]:
from sklearn.svm import SVC
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.model_selection import GridSearchCV
from sklearn import metrics
import matplotlib.pyplot as plt
import seaborn as sns

SVM algorithm offers a choice of kernel functions for performing its processing. Basically, mapping data into a higher dimensional space is called "kernelling". The mathematical function used for the transformation is known as the kernel function and can be of different types, such as:

1.Linear
2.Polynomial
3.Radial Basis function
4.Sigmoid

Each of these functions has its characteristics, its pros and cons and its equation, but there's no easy way of knowing which function performs best with any given dataset. We usually choose different functions in turn and compare the results. We will do that with 2 functions : Linear and Radial basis.

In [66]:
file_path="C:/Users/kleop/Documents/repos/Exercises/Machine_Learning/Coursework_2/heart.csv"
data=pd.read_csv(file_path, sep=',', decimal=".")
#data.head()
data.dtypes #to check. if there is any non-numeric variable

age           int64
sex           int64
cp            int64
trestbps      int64
chol          int64
fbs           int64
restecg       int64
thalach       int64
exang         int64
oldpeak     float64
slope         int64
ca            int64
thal          int64
target        int64
dtype: object

In [67]:
X=data.iloc[:,:-1].values
y=data.iloc[:,-1].values

In [68]:
scale=StandardScaler()
X=scale.fit_transform(X)
print(X)

[[ 0.9521966   0.68100522  1.97312292 ... -2.27457861 -0.71442887
  -2.14887271]
 [-1.91531289  0.68100522  1.00257707 ... -2.27457861 -0.71442887
  -0.51292188]
 [-1.47415758 -1.46841752  0.03203122 ...  0.97635214 -0.71442887
  -0.51292188]
 ...
 [ 1.50364073  0.68100522 -0.93851463 ... -0.64911323  1.24459328
   1.12302895]
 [ 0.29046364  0.68100522 -0.93851463 ... -0.64911323  0.26508221
   1.12302895]
 [ 0.29046364 -1.46841752  0.03203122 ... -0.64911323  0.26508221
  -0.51292188]]


In [69]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=0)

In [70]:
pca =PCA(n_components =10)
X_train_pca = pca.fit_transform(X_train)
X_test_pca = pca.transform(X_test)
print(pca.explained_variance_ratio_.cumsum())

[0.20879927 0.32995097 0.42369281 0.51514961 0.59630549 0.67101175
 0.73647104 0.79594704 0.85092807 0.90117182]


# SVM model

In [83]:
model_svm1 = SVC(kernel='linear')    
model_svm1.fit(X_train_pca, y_train)
accuracy_train = model_svm1.score(X_train_pca, y_train)
print(f"Accuracy: {accuracy_train}")

Accuracy: 0.8429752066115702


In [84]:
model_svm2 = SVC(kernel='rbf', gamma='auto')
model_svm2.fit(X_train_pca, y_train)
accuracy_train = model_svm2.score(X_train_pca, y_train)
print(f"Accuracy: {accuracy_train}")

Accuracy: 0.9090909090909091


Radial basis kernel seems to be performing better on the training set of this dataset.

# Performing 5-Fold Grid-Search Cross Validation on  SVM Classifier on the Training Set

1. Specify  gamma(=the corresponding kernel coefficients, which will be used for calculating the hyperplane with respect to a new set of dimensions)
2. Specify C(=the penalty we want to impose on the incorrectly placed datapoints with respect to the new hyperplane)

In [85]:
params = {'C':[1, 10, 20]}
folds = 5

model_cv1 = GridSearchCV(estimator = model_svm1, 
                        param_grid = params, 
                        scoring= 'accuracy', 
                        cv = folds,
                        return_train_score=True,
                        verbose = 3)

model_cv1.fit(X_train_pca, y_train)
print()
model_cv2 = GridSearchCV(estimator = model_svm2, 
                        param_grid = params, 
                        scoring= 'accuracy', 
                        cv = folds,
                        return_train_score=True,
                        verbose = 3)

model_cv2.fit(X_train_pca, y_train)

Fitting 5 folds for each of 3 candidates, totalling 15 fits
[CV 1/5] END ...........C=1;, score=(train=0.850, test=0.776) total time=   0.0s
[CV 2/5] END ...........C=1;, score=(train=0.850, test=0.755) total time=   0.0s
[CV 3/5] END ...........C=1;, score=(train=0.825, test=0.875) total time=   0.0s
[CV 4/5] END ...........C=1;, score=(train=0.840, test=0.875) total time=   0.0s
[CV 5/5] END ...........C=1;, score=(train=0.835, test=0.812) total time=   0.0s
[CV 1/5] END ..........C=10;, score=(train=0.850, test=0.755) total time=   0.0s
[CV 2/5] END ..........C=10;, score=(train=0.865, test=0.755) total time=   0.0s
[CV 3/5] END ..........C=10;, score=(train=0.825, test=0.875) total time=   0.0s
[CV 4/5] END ..........C=10;, score=(train=0.840, test=0.875) total time=   0.0s
[CV 5/5] END ..........C=10;, score=(train=0.835, test=0.812) total time=   0.0s
[CV 1/5] END ..........C=20;, score=(train=0.850, test=0.755) total time=   0.0s
[CV 2/5] END ..........C=20;, score=(train=0.865,

In [79]:
print("For linear:",model_cv1.best_params_)
print("For RBF:", model_cv2.best_params_)

For linear: {'C': 1}
For RBF: {'C': 10}


# Re-training the SVM Classifier with the best hyper-parameter, C=1 for linear function and C = 10 for RBF (obtained above)

In [80]:
model_svm1 = SVC(kernel='linear', C=1)
model_svm1.fit(X_train_pca, y_train)
y_pred_train1=model_svm1.predict(X_train_pca)
y_pred_test1=model_svm1.predict(X_test_pca)

In [81]:
model_svm2 = SVC(kernel='rbf',gamma='auto', C=10)
model_svm2.fit(X_train_pca, y_train)
y_pred_train2=model_svm2.predict(X_train_pca)
y_pred_test2=model_svm2.predict(X_test_pca)

# Evaluation

In [82]:
accuracy_train = model_svm1.score(X_train_pca, y_train)
accuracy_test = model_svm1.score(X_test_pca, y_test)
print(f"Accuracy for C=1: {accuracy_train},{accuracy_test}")
print("Confusion matrix for train: \n",metrics.confusion_matrix(y_train, y_pred_train1))
print("Confusion matrix for test: \n",metrics.confusion_matrix(y_test,y_pred_test1))
print()
accuracy_train = model_svm2.score(X_train_pca, y_train)
accuracy_test = model_svm2.score(X_test_pca, y_test)
print(f"Accuracy for C=10 {accuracy_train},{accuracy_test}")
print("Confusion matrix for train: \n",metrics.confusion_matrix(y_train, y_pred_train2))
print("Confusion matrix for test: \n",metrics.confusion_matrix(y_test,y_pred_test2))


Accuracy for C=1: 0.8429752066115702,0.8032786885245902
Confusion matrix for train: 
 [[ 86  25]
 [ 13 118]]
Confusion matrix for test: 
 [[19  8]
 [ 4 30]]

Accuracy for C=10 0.987603305785124,0.819672131147541
Confusion matrix for train: 
 [[108   3]
 [  0 131]]
Confusion matrix for test: 
 [[21  6]
 [ 5 29]]


From the results above we observe that for this specific dataset and this specific classification task the radial basis function with C=10 hyperparameter gives us the optimal results compared to the linear function with C=1.
We get an extremely high accuracy on the training set (98%) and a relatively smaller accuracy on the test set, which is still a high accuracy (81%)