#<center>Classification with SVM<center>

### Load drive

### Load libraries

In [1]:
# Libraries
import os
import numpy as np
import pandas as pd
from scipy.io import loadmat

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC
import matplotlib.pyplot as plt
import seaborn as sns

# Style for chart
sns.set_style('darkgrid')
plt.rc('axes', titlesize=18)
plt.rc('axes', labelsize=14)
plt.rc('xtick', labelsize=13)
plt.rc('ytick', labelsize=13)
plt.rc('legend', fontsize=13)
plt.rc('font', size=13)

### Load raw data

In [None]:
ecg_info = loadmat('./Data/ECGData.mat')['ECGData'][0][0]
ecg_data = ecg_info[0]
labels = np.array([lab[0][0] for lab in ecg_info[1]])

#### Take a look at the data

Size of the data:

In [None]:
print('ecg data size: ',ecg_data.shape)
print('ecg labels size: ',labels.shape)

Data:

In [None]:
ecg_data

In [None]:
list_lab = np.unique(labels)
for i in range(len(list_lab)):
  print(list_lab[i],': ',len(labels[labels == list_lab[i]]))

NSR: normal subject<br>
ARR: cardiac arrhythmia subject<br>
CHF: heart failure subject<br>

Convert labels from string to integer (0,1,2):

In [None]:
labels[labels == 'NSR'] = 0
labels[labels == 'ARR'] = 1
labels[labels == 'CHF'] = 2

labels = labels.astype(int)

In [None]:
labels

Plot one ECG for each group:

In [None]:
len_samples = 3000

fig = plt.figure(figsize=(21, 10))
columns = 1
rows = 3
idx = [0,100,140]
for i in range(1, columns*rows +1):
    fig.add_subplot(rows, columns, i)
    plt.plot(list(range(len_samples)),ecg_data[idx[i-1],0:len_samples])
    plt.title('ECG signal ({0})'.format(list_lab[i-1]))
    plt.xlabel('Samples')
    plt.ylabel('Volts')
plt.tight_layout()
plt.show()


### Split the dataset in train and test sets

In [None]:
X_train, X_test, y_train, y_test = train_test_split(ecg_data,labels,test_size=0.2,stratify=labels)

Check the train and test sets:

In [None]:
# Train
print('Train set:')
list_lab_tr = np.unique(y_train)
for i in range(len(list_lab_tr)):
  print('  * ',list_lab_tr[i],': ',len(y_train[y_train == list_lab_tr[i]]))
# Test
print('\nTest set:')
list_lab_te = np.unique(y_test)
for i in range(len(list_lab_te)):
  print('  *',list_lab_te[i],': ',len(y_test[y_test == list_lab_te[i]]))

##<center>Support Vectors Machines<center>

### Train SVM classifier

In [None]:
#Create a svm Classifier
clf = SVC(kernel='linear',C=200) # Linear Kernel

#Train the model using the training sets
clf.fit(X_train, y_train)


### Test SVM classifier

In [None]:
#Predict the response for test dataset
y_pred = clf.predict(X_test)

# Model Accuracy
print("Accuracy: ",accuracy_score(y_test, y_pred))

##<center>Support Vectors Machines for wavelet features<center>

### Load wavelet features

In [None]:
features = loadmat('../Lez4/Data/features.mat')['data'][0][0]
features_data = features[0]

labels = np.array([lab[0] for lab in features[1]])

data_wl_features = features_data
data_wl_labels = labels

# get data in numpy.array

### Split the data in train and test sets

In [None]:
X_train, X_test, y_train, y_test = train_test_split(data_wl_features,data_wl_labels,test_size=0.2,stratify=labels)

### Train SVM classifier on the wavelet features

In [None]:
#Create a svm Classifier
clf = SVC(kernel='linear',C=200) # Linear Kernel

#Train the model using the training sets
clf.fit(X_train, y_train)



### Test SVM classifier on the wavelet featuers

In [None]:
#Predict the response for test dataset
y_pred = clf.predict(X_test)

# Model Accuracy
print("Accuracy: ",accuracy_score(y_test, y_pred))


##<center>Support Vectors Machines for wavelet features + Fourier<center>

### Load wavelet features

In [None]:
features = loadmat('../Lez4/Data/features_fourier.mat')['data'][0][0]


labels = np.array([lab[0] for lab in features[1]])

data_wl_features_f = features[0]
data_wl_labels_f = labels

# get data in numpy.array

### Split the data in train and test sets

In [None]:
X_train, X_test, y_train, y_test = train_test_split(data_wl_features_f,data_wl_labels_f,test_size=0.2,stratify=labels)

### Train SVM classifier on the wavelet features

In [None]:
#Create a svm Classifier
clf = SVC(kernel='linear', C=200)

#Train the model using the training sets
clf.fit(X_train, y_train)


### Test SVM classifier on the wavelet featuers + Fourier

In [None]:
#Predict the response for test dataset
y_pred = clf.predict(X_test)
# Model Accuracy
print("Accuracy: ", accuracy_score(y_test, y_pred))

##<center>Cross-Validation<center>

![]()

<center><div>
<img src="https://scikit-learn.org/stable/_images/grid_search_cross_validation.png" width="400"/>
</div><center><br>

Procedure for k-fold Cross-Validation

In [None]:
import sklearn
sklearn.metrics.get_scorer_names()


In [2]:
ecg_info = loadmat('../Lez4/Data/features.mat')['data'][0][0]
ecg_data = ecg_info[0]
labels = np.array([lab[0] for lab in ecg_info[1]])

In [3]:
X_train, X_test, y_train, y_test = train_test_split(ecg_data,labels,test_size=0.2,stratify=labels)

In [6]:
# Create a svm Classifier and run it over the raw data
new_clf = SVC(kernel='linear',C=10) # Linear Kernel


In [7]:
from sklearn.model_selection import cross_val_score, cross_validate
from statistics import *
import numpy

scores = cross_validate(new_clf, X_train, y_train, cv=5, scoring=['accuracy', 'precision_macro', 'recall_macro', 'f1_macro'])

print(f"""Accuracy: \t{mean(scores["test_accuracy"])}\t{numpy.std(scores["test_accuracy"])}\n
Precision: \t{mean(scores["test_precision_macro"])}\t{numpy.std(scores["test_precision_macro"])}\n
Recall: \t{mean(scores["test_recall_macro"])}\t{numpy.std(scores["test_recall_macro"])}\n
F1: \t{mean(scores["test_f1_macro"])}\t{numpy.std(scores["test_f1_macro"])}\n
""")

KeyboardInterrupt: 

##<center>Grid-Search CV<center>

In [None]:
# TO DO
from sklearn.model_selection import GridSearchCV

grid = {'C':[.01, .1, 1, 5, 20, 200], 'kernel':['linear', 'poly'], 'degree':[3, 5]}

grid = GridSearchCV(SVC(), param_grid=grid, refit=True, verbose=3)
grid.fit(X_train, y_train)



Fitting 5 folds for each of 16 candidates, totalling 80 fits
[CV 1/5] END ......C=1, degree=3, kernel=linear;, score=0.639 total time=   5.0s
[CV 2/5] END ......C=1, degree=3, kernel=linear;, score=0.644 total time=   4.3s
[CV 3/5] END ......C=1, degree=3, kernel=linear;, score=0.639 total time=   4.4s
[CV 4/5] END ......C=1, degree=3, kernel=linear;, score=0.630 total time=   4.9s
[CV 5/5] END ......C=1, degree=3, kernel=linear;, score=0.653 total time=   4.1s
[CV 1/5] END ........C=1, degree=3, kernel=poly;, score=0.602 total time=   0.1s
[CV 2/5] END ........C=1, degree=3, kernel=poly;, score=0.606 total time=   0.2s
[CV 3/5] END ........C=1, degree=3, kernel=poly;, score=0.599 total time=   0.1s
[CV 4/5] END ........C=1, degree=3, kernel=poly;, score=0.611 total time=   0.1s
[CV 5/5] END ........C=1, degree=3, kernel=poly;, score=0.610 total time=   0.3s
[CV 1/5] END ......C=1, degree=5, kernel=linear;, score=0.639 total time=   4.8s
[CV 2/5] END ......C=1, degree=5, kernel=linear;

In [None]:

y_pred = clf.predict(X_test)

scores = [
    accuracy_score(y_test, y_pred),
    precision_score(y_test, y_pred),
    recall_score(y_test, y_pred),
    f1_score(y_test, y_pred)
]
#scoring=['accuracy', 'precision_macro', 'recall_macro', 'f1_macro']



print(f"""Accuracy: \t{mean(scores[0])}\t{numpy.std(scores[0])}\n
Precision: \t{mean(scores[1])}\t{numpy.std(scores[1])}\n
Recall: \t{mean(scores[2])}\t{numpy.std(scores[2])}\n
F1: \t{mean(scores[3])}\t{numpy.std(scores[4])}\n
""")