# Dependencies

In [153]:
import numpy as np
import pandas as pd
import scipy.io
import matplotlib.pyplot as plt
from scipy import signal
import librosa as lr
import librosa.feature as lrf
from scipy.signal import welch
import pywt
from pywt import *
from scipy.signal import periodogram
#from pyemd import emd
from scipy.signal import hilbert
from scipy.stats import linregress, skew, kurtosis
from scipy.fft import fft, fftfreq

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=UserWarning)
warnings.simplefilter(action='ignore', category=RuntimeWarning)

import nolds
from pyentrp import entropy as ent
from scipy.signal import detrend
from nolds import dfa

In [154]:
from utils import *
from training import *
dataset = data_loader("leaveout.csv")
# the label column should be the last one, we swap it with the subject
new_cols = ['channels', 'spc_cnt', 'spc_roff', 'zc', 'mfcc_0', 'mfcc_1', 'mfcc_2',
       'mfcc_3', 'mfcc_4', 'mfcc_5', 'mfcc_6', 'mfcc_7', 'mfcc_8', 'mfcc_9',
       'chr_0', 'chr_1', 'chr_2', 'chr_3', 'chr_4', 'chr_5', 'chr_6', 'chr_7',
       'chr_8', 'chr_9', 'chr_10', 'chr_11', 'chr_12', 'chr_13', 'chr_14',
       'chr_15', 'chr_16', 'chr_17', 'chr_18', 'chr_19', 'mel_0', 'mel_1',
       'mel_2', 'mel_3', 'mel_4', 'mel_5', 'mel_6', 'mel_7', 'mel_8', 'mel_9',
       'mel_10', 'mel_11', 'mel_12', 'mel_13', 'mel_14', 'delta_power',
       'theta_power', 'alpha_power', 'beta_power', 'gamma_power', 'gamma_beta',
       'gamma_alpha', 'gamma_theta', 'gamma_delta', 'beta_alpha', 'beta_theta',
       'beta_delta', 'alpha_theta', 'alpha_delta', 'theta_delta',
       'mean_abs_sec_dif', 'dfa', 'slope', 'skew', 'kurtosis', 'activity',
       'mobility', 'complexity', 'rms', 'tempo', 'ton_0', 'ton_1', 'ton_2',
       'ton_3', 'ton_4', 'ton_5', 'subject', 'label']

dataset = dataset.reindex(columns=new_cols) # this is fixed in the extraction

In [155]:
subject_idx = {1: [0, 187],
               2: [188, 319],
               3: [320, 470],
               4: [471, 617],
               5: [618, 841],
               6: [842, 1007],
               7: [1008, 1109],
               8: [1110, 1373],
               9: [1374, 1687],
               10: [1688, 1795],
               11: [1796, 2021]} 

In [156]:
selected_channels = ["Cz","CP3","CPz","P3"]
reduced_dataset = channel_selection(dataset, selected_channels)

In [160]:
from tqdm import tqdm
models = ['K-NN', 'K-NN1', 'K-NN2', 'K-NN3', 'SVM', 'DTC', 'RFC', 'Logistic Regression', 'NN', 'GBC']
model_dfs = []
for model in tqdm(models):
    one_out_results = {}
    for sub in subject_idx:
        one_out = reduced_dataset[reduced_dataset["subject"] == sub].drop('subject', axis=1)
        X_train = (reduced_dataset.drop(one_out.index)).drop('subject', axis=1).drop('label', axis=1)
        y_train = (reduced_dataset.drop(one_out.index)).drop('subject', axis=1)['label']
        X_test = one_out.drop('label', axis=1)
        y_test = one_out['label']
        data = [X_train, X_test, y_train, y_test]
        one_out_results[sub] = model_training(data, model, stats=False, cm=False)
        one_out_results_df = pd.DataFrame.from_dict(one_out_results).T
        one_out_results_df.to_csv("outs/" + model + "_one_out.csv")
        model_dfs.append(one_out_results_df)


100%|██████████| 10/10 [28:47<00:00, 172.75s/it]


In [167]:
test_model = SVC()
for model in ["SVM-def"]:
    one_out_results = {}
    for sub in subject_idx:
        one_out = reduced_dataset[reduced_dataset["subject"] == sub].drop('subject', axis=1)
        X_train = (reduced_dataset.drop(one_out.index)).drop('subject', axis=1).drop('label', axis=1)
        y_train = (reduced_dataset.drop(one_out.index)).drop('subject', axis=1)['label']
        X_test = one_out.drop('label', axis=1)
        y_test = one_out['label']
        data = [X_train, X_test, y_train, y_test]
        test_model.fit(X_train, y_train)
        stats_dict = {}

        stats_dict['training_acc'] = test_model.score(X_train, y_train)
        stats_dict['test_acc'] = test_model.score(X_test, y_test)

        stats_dict['sensitivity'] = recall_score(y_test, test_model.predict(X_test))
        stats_dict['precision'] = precision_score(y_test, test_model.predict(X_test))
        stats_dict['f1'] = f1_score(y_test, test_model.predict(X_test))

        fpr, tpr, thresholds = roc_curve(y_test, test_model.predict(X_test))
        stats_dict['auc'] = roc_auc_score(y_test, test_model.predict(X_test))
        stats_dict['logloss'] = log_loss(y_test, test_model.predict(X_test))
        one_out_results[sub] = stats_dict
        one_out_results_df = pd.DataFrame.from_dict(one_out_results).T
        one_out_results_df.to_csv("outs/" + model + "_one_out.csv")
        model_dfs.append(one_out_results_df)