In [8]:
import pandas as pd
import seaborn
import numpy as np
from pylab import *
import glob, os

from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV, cross_val_predict
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score, roc_auc_score, make_scorer

In [9]:
basedir = os.getcwd()
os.chdir(basedir)

In [10]:
def get_best_channels(channelsdf, nchannels=2):
    
    list_of_rows = [list(row) for row in channelsdf.values]
    
    channel_score_dict = {}
    for i, elem in enumerate(list_of_rows[0]):
        channel_score_dict[elem] = i

    for row, rowlist in enumerate(list_of_rows[1:]):
        for i, elem in enumerate(rowlist):
            channel_score_dict[elem] = channel_score_dict[elem] + i

    sorted_channels = sorted(channel_score_dict, key=channel_score_dict.get)
    best_channels = sorted_channels[:nchannels]
    
    return best_channels

def get_expression(channels):
    expression = 'df['
    for channel in best_channels:
        expression +='(df[\'channel\'] == \'' + channel + '\')|'
    return expression[:-1] + ']'

def split_train_test(data, test_ratio):
    test_set_size = int(len(data) * test_ratio)
    test_set = data.iloc[:test_set_size]
    train_set = data.iloc[test_set_size:]
    return train_set, test_set

def split_proportional(dataframe, test_ratio, target):
    
    df_seizures = dataframe[dataframe[target] == True]
    df_normal = dataframe[dataframe[target] == False]
    
    train_seizures, test_seizures = split_train_test(df_seizures, test_ratio)
    train_normal, test_normal = split_train_test(df_normal, test_ratio)
    
    df_train = pd.concat([train_normal, train_seizures], axis=0).reset_index()
    x_train, y_train = df_train[df_train.columns.difference([target])], df_train[target]
    df_test = pd.concat([test_normal, test_seizures], axis=0).reset_index()
    x_test, y_test = df_test[df_test.columns.difference([target])], df_test[target]
    
    return x_train, y_train, x_test, y_test


def read_last_results(file, ncolumns):
    if os.path.isfile('svm_rbf_' + str(nchannels) + 'channels.csv'):
        past_df = pd.read_csv('svm_rbf_' + str(nchannels) + 'channels.csv', delimiter=',')
    else:
        zero_data = np.zeros([len(patients), ncolumns])
        past_df = pd.DataFrame(zero_data, columns = column_names)
    return past_df

In [11]:
patient = 'chb07'
nchannels = 1
column_names = ['patient', 'model', 'hyperparameters', 'sensitivity', 'specificity', 'roc_auc', 'precision', 'accuracy']


In [12]:
# Calculate most significant channels for patient
channelsdf = pd.read_csv('..\DataSetCreation\Datasets\\' + patient + '_channel_order.csv', delimiter=',')
best_channels = get_best_channels(channelsdf, nchannels)
best_channels

['F3-C3']

In [13]:
# Read the results last dataframe and extracts the row
past_df = read_last_results(basedir + '\\results_normal\\svm_rbf_' + str(nchannels) + 'channels.csv', len(column_names))
past_patient_data = past_df[past_df['patient'] == patient]
pos = past_patient_data.index[0]

# Calculate most significant channels for patient
channelsdf = pd.read_csv('..\DataSetCreation\Datasets\\' + patient + '_channel_order.csv', delimiter=',')
best_channels = get_best_channels(channelsdf, nchannels)
    
# Read the patient dataframe
df = pd.read_hdf('..\DataSetCreation\Datasets\\' + patient + 'features.h5', key = 'fullpatient')

# Extract only the best channels
df = eval(get_expression(best_channels))
df = df.drop(['channel'], axis=1)

In [44]:
# Split the dataframe into train and test
x_train, y_train, x_test, y_test = split_proportional(df, 0.2, 'seizure')

C = 5
gamma = 0.01
kernel = 'rbf'
max_iter = 100000000
#params = '(C: {}, gamma: {})'.format(C, gamma)
params = {'clf__C': C, 'clf__gamma': gamma, 'clf__kernel': kernel}

# Train models
svm_clf = Pipeline((
    ("scaler", StandardScaler()),
    ("linear_svc", SVC(kernel = kernel, C=C, gamma = gamma, max_iter = max_iter))
))
svm_clf.fit(x_train, y_train)

#y_scores = svm_clf.decision_function()

y_train_pred = cross_val_predict(svm_clf, x_train, y_train, cv=5, n_jobs = 12)

# Predict
y_train_pred = svm_clf.predict(x_train)
y_true, y_pred = y_test, svm_clf.predict(x_test)

cm = confusion_matrix(y_true, y_pred)
sensitivity = cm[1][1]/(cm[1][1] + cm[1][0])
specificity = cm[0][0]/(cm[0][0] + cm[0][1])
precision = cm[1][1]/(cm[1][1] + cm[0][1])
roc_score = roc_auc_score(y_true, y_pred)
accuracy = (cm[0][0]+cm[1][1])/(sum(cm))

In [45]:
print('Precision score: ' + str(precision_score(y_train, y_train_pred)))
print('Recall score: ' + str(recall_score(y_train, y_train_pred)))
print('F1 score: ' + str(f1_score(y_train, y_train_pred)))
print('Roc_auc score: ' + str(roc_auc_score(y_train, y_train_pred)))

Precision score: 1.0
Recall score: 1.0
F1 score: 1.0
Roc_auc score: 1.0


In [46]:
print('Sensitivity score: ' + str(sensitivity))
print('Specificity score: ' + str(specificity))
print('Precision score: ' + str(precision))
print('Roc_auc score: ' + str(roc_score))
print('Accuracy: ' + str(accuracy))

Sensitivity score: 0.2727272727272727
Specificity score: 0.999244142101285
Precision score: 0.75
Roc_auc score: 0.6359857074142788
Accuracy: 0.9932533733133433


In [47]:
# Replace if better results
if (sensitivity + specificity )/2 > (past_patient_data['sensitivity'].values[0] + past_patient_data['specificity'].values[0])/2:
    print('Better results than before')
    data = np.array([patient, 'svm_rbf', params, sensitivity, specificity, roc_score, precision, accuracy])
    
    aux_array = past_df.to_numpy()
    aux_array[pos] = data
    new_df = pd.DataFrame(aux_array, columns = column_names)
    # Save new results
    new_df.to_csv(basedir + '\\results_normal\\svm_rbf_' + str(nchannels) + 'channels.csv', index=False)

Better results than before
