## Dependencies

In [3]:
import numpy as np
import pandas as pd

import utils, selection, training, constants

## I/O - Initialization

In [4]:
# load the feature dataset as a dataframe
channel_names =  ["Fp1", "Fp2", "F7", "F3", "Fz", "F4", "F8", "FT7", "FC3", "FCZ", "FC4", "FT8", "T3", "C3", "Cz", "C4", "T4", "TP7", "CP3", "CPz", "CP4", "TP8", "T5", "P3", "PZ", "P4", "T6", "O1", "Oz" , "O2"]

#channel subsets
#test_channels_1 = ["Cz","CP3","CPz","P3"]
#test_channels_2 = ["Fp1", "Fp2", "F7", "F3", "Fz", "F4", "F8", "FT7", "FC3", "FCZ", "FC4", "FT8", "T3", "C3", "Cz", "C4", "T4", "TP7", "CP3", "CPz", "CP4", "TP8", "T5", "P3", "PZ", "P4", "T6", "O1", "Oz" , "O2"]
#test_channels_3 = ["Cz","CP3","CPz","P3","FT7","FC3","FCZ","FC4","C4","CP4","TP8","T5","PZ","T6","O1","Oz","O2"]

csv_file = 'eeg_features.csv'

dataset = utils.data_loader(constants.MAIN_CSV_FILE)
reduced_dataset_all = utils.channel_selection(dataset, channel_names)
reduced_dataset_target = utils.channel_selection(dataset, constants.SELECTED_CHANNELS)
#reduced_dataset_1 = utils.channel_selection(dataset, test_channels_1)
#reduced_dataset_1 = utils.channel_selection(dataset, test_channels_1)
#reduced_dataset_2 = utils.channel_selection(dataset, test_channels_2)
#reduced_dataset_3 = utils.channel_selection(dataset, test_channels_3)

#all_features = reduced_dataset_all.columns[:len(reduced_dataset_all.columns) - 1]

## Per-Channel Training+Incremental Training

In [None]:
#result = incremental_training(dataset=dataset, channel_list=channel_list, feature_subset=all_features, models=['K-NN'], mode='feature', save=True)
#calculate accuracy for each channel
for channel in channel_names:
    print(channel)
    models = ['K-NN', 'GBC']
    dataset = utils.data_loader(csv_file)
    reduced_dataset = utils.channel_selection(dataset, [channel])
    data = training.data_preparation(dataset=reduced_dataset, feature_subset=all_features)
    for model in models:
        model_training(data, model, stats=False, cm=False)

## Chi-Square

In [6]:
feature_subsets = {}
feature_subsets['bands'] = ['delta_power', 'theta_power',
       'alpha_power', 'beta_power', 'gamma_power', 'gamma_beta', 'gamma_alpha',
       'gamma_theta', 'gamma_delta', 'beta_alpha', 'beta_theta', 'beta_delta',
       'alpha_theta', 'alpha_delta', 'theta_delta']

feature_subsets['time_features'] = ['skew', 'kurtosis', 'rms', 'activity', 'mobility', 'complexity', 'dfa', 'mean_abs_sec_dif']

feature_subsets['freq_features'] = ['spc_cnt', 'spc_roff', 'zc', 'slope']

feature_subsets['mfcc_features'] = ['mfcc_0', 'mfcc_1', 'mfcc_2', 'mfcc_3',
       'mfcc_4', 'mfcc_5', 'mfcc_6', 'mfcc_7', 'mfcc_8', 'mfcc_9']

feature_subsets['mel_features'] = ['mel_0', 'mel_1', 'mel_2',
               'mel_3', 'mel_4', 'mel_5', 'mel_6', 'mel_7', 'mel_8', 'mel_9', 'mel_10',
               'mel_11', 'mel_12', 'mel_13', 'mel_14']

feature_subsets['chr_features'] = ['chr_0',
                'chr_1', 'chr_2', 'chr_3', 'chr_4', 'chr_5', 'chr_6', 'chr_7', 'chr_8',
                'chr_9', 'chr_10', 'chr_11', 'chr_12', 'chr_13', 'chr_14', 'chr_15',
                'chr_16', 'chr_17', 'chr_18', 'chr_19']

feature_subsets['ton_features'] = ['ton_0', 'ton_1', 'ton_2', 'ton_3', 'ton_4', 'ton_5']

feature_subsets['spectral_features'] = feature_subsets['mfcc_features'] + feature_subsets['mel_features']

feature_subsets['music'] = feature_subsets['chr_features'] + feature_subsets['ton_features']

feature_subsets['coeffs'] = feature_subsets['spectral_features'] + feature_subsets['music']

feature_subsets['comb_domain'] = feature_subsets['time_features'] + feature_subsets['freq_features'] + feature_subsets['bands']

feature_subsets['no_music'] = feature_subsets['spectral_features'] + feature_subsets['comb_domain']

In [7]:
import numpy as np
from sklearn.feature_selection import chi2
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()

X_train, X_test, y_train, y_test = training.data_preparation(dataset=reduced_dataset_1, feature_subset=feature_subsets['no_music'])
X = np.concatenate([X_train, X_test])
X = scaler.fit_transform(X)
y = np.concatenate([y_train, y_test]).flatten()

chi2_stats, p_values = chi2(X, y)

In [14]:
chi2_dict = {}
for idx, feature in enumerate(feature_subsets['no_music']):
    chi2_dict[feature] = chi2_stats[idx]
chi_sorted = dict(sorted(chi2_dict.items(), key=lambda item: item[1], reverse=True))

In [15]:
chi_sorted

{'complexity': 94.28211862279322,
 'alpha_power': 61.61758765555503,
 'spc_roff': 45.335745653110465,
 'mel_10': 34.48564761491927,
 'spc_cnt': 32.83364136602729,
 'gamma_beta': 32.70304573869532,
 'theta_power': 30.496223978846103,
 'mel_7': 29.23799434939319,
 'mel_8': 28.2197282588603,
 'mel_11': 28.00341549280623,
 'mel_14': 27.895614759311346,
 'mel_6': 27.74221524291422,
 'mel_12': 26.42683801615235,
 'mel_9': 24.928265360565987,
 'rms': 24.313244174897456,
 'beta_alpha': 23.220564048041908,
 'gamma_alpha': 22.58373946911418,
 'mel_13': 21.773202922383213,
 'alpha_delta': 20.175210830953596,
 'mfcc_8': 19.043161553563458,
 'mobility': 17.02425805895643,
 'mel_5': 16.826754580813095,
 'mfcc_2': 16.639397142777653,
 'mfcc_3': 14.875414600070165,
 'mel_4': 12.497447896982823,
 'mfcc_6': 11.937535938453392,
 'alpha_theta': 11.095610187638918,
 'mfcc_0': 10.90100267891954,
 'mel_2': 9.799091830858487,
 'mel_3': 9.642782891854505,
 'mel_1': 9.174846537131351,
 'gamma_theta': 9.11242206

## ANOVA

In [8]:
variance_df, variance_dict = selection.variance_thresholding(reduced_dataset_1, threshold=0.2)

In [9]:
from sklearn.feature_selection import f_classif
scaler = MinMaxScaler()

X_train, X_test, y_train, y_test = training.data_preparation(dataset=reduced_dataset_1, feature_subset=feature_subsets['no_music'])
X = np.concatenate([X_train, X_test])
X = scaler.fit_transform(X)
y = np.concatenate([y_train, y_test]).flatten()

f_statistic, p_values = f_classif(X, y)

In [10]:
f_dict = {}
for idx, feature in enumerate(feature_subsets['no_music']):
    f_dict[feature] = f_statistic[idx]
f_sorted = dict(sorted(f_dict.items(), key=lambda item: item[1], reverse=True)) 

In [16]:
f_sorted

{'spc_roff': 1179.7978846086457,
 'complexity': 1171.7471438323246,
 'mel_11': 1160.4100113966963,
 'mel_10': 1144.9544212153526,
 'mel_6': 1075.6277307751693,
 'mel_7': 952.5670299279793,
 'mel_12': 942.540337366972,
 'mel_9': 936.5967676403434,
 'mel_8': 891.0888782245993,
 'mel_14': 820.7423233308908,
 'mel_13': 764.8618977509211,
 'rms': 724.8313528466728,
 'spc_cnt': 717.5656346396459,
 'mel_5': 707.6203337209621,
 'alpha_power': 674.0351116859397,
 'theta_power': 659.5643477476898,
 'mfcc_8': 541.380384782955,
 'mfcc_2': 457.90180619717114,
 'gamma_beta': 427.4471517208908,
 'mel_4': 384.0438940190917,
 'mfcc_3': 360.71091534674434,
 'mel_3': 349.0845613165568,
 'mobility': 347.5485006669642,
 'mfcc_0': 332.5889617865878,
 'mfcc_6': 326.9010075256861,
 'mel_2': 303.7056483738949,
 'mel_1': 281.4245060104526,
 'zc': 265.22149867835975,
 'mean_abs_sec_dif': 256.20641049970425,
 'dfa': 254.07184268421352,
 'beta_alpha': 249.93641970469574,
 'alpha_delta': 223.3189549940299,
 'mfcc_1

## Manual Feature Selection

In [5]:
anova_selected = ['spc_roff', 'complexity', 'mel_11', 'mel_10', 'mel_6', 'mel_7', 'mel_12', 'mel_9', 'mel_8', 'mel_14', 'mel_13', 'rms', 'spc_cnt', 'mel_5', 'alpha_power', 'theta_power', 'mfcc_8', 'mfcc_2', 'gamma_beta', 'mel_4', 'mfcc_3', 'mel_3', 'mobility', 'mfcc_0', 'mfcc_6', 'mel_2', 'mel_1', 'zc', 'mean_abs_sec_dif', 'dfa', 'beta_alpha', 'alpha_delta', 'mfcc_1', 'gamma_alpha', 'mel_0', 'delta_power', 'alpha_theta', 'gamma_theta', 'activity', 'mfcc_5', 'gamma_delta', 'mfcc_4', 'beta_theta', 'skew', 'beta_power', 'theta_delta', 'mfcc_7', 'slope']
data = training.data_preparation(dataset=reduced_dataset_target, feature_subset=anova_selected)
for model in constants.ALL_MODELS:
    training.model_training(data, model, stats=True, cm=False)


==== Stats_dict for the K-NN model ====
Training Accuracy:  0.7527047913446677
Test Accuracy:  0.7435105067985167
Sensitivity (Recall): 0.7391304347826086
Precision: 0.74375
F1_score: 0.7414330218068534
AUC: 0.7434889566286967
Logloss: 9.244818390904586


==== Stats_dict for the SVM model ====
Training Accuracy:  1.0
Test Accuracy:  0.927070457354759
Sensitivity (Recall): 0.9465838509316771
Precision: 0.910394265232975
F1_score: 0.9281364190012181
AUC: 0.9271664642112261
Logloss: 2.6286471569319065


==== Stats_dict for the DTC model ====
Training Accuracy:  0.8360123647604327
Test Accuracy:  0.7571075401730532
Sensitivity (Recall): 0.7751552795031056
Precision: 0.7464114832535885
F1_score: 0.7605118829981717
AUC: 0.7571963359385147
Logloss: 8.754731632832534


==== Stats_dict for the RFC model ====
Training Accuracy:  1.0
Test Accuracy:  0.8331273176761433
Sensitivity (Recall): 0.8596273291925466
Precision: 0.8150765606595995
F1_score: 0.8367593712212817
AUC: 0.8332576990366177
Loglo

## P-Value Thresholding

In [45]:
channel_subsets = [channel_names, test_channels_1, test_channels_2, test_channels_3]
subset_names = ['chn_all', 'chn_1', 'chn_2', 'chn_3']
p_val_df = pd.DataFrame(columns=subset_names)

for i in range(len(channel_subsets)):
    dataset = data_loader(csv_file)
    reduced_dataset_i = utils.channel_selection(dataset, channel_subsets[i])
    p_i, p_i_val = selcetion.p_value_thresholding(reduced_dataset_i, feature_subset=all_features)
    p_val_df[subset_names[i]] = p_i_val    

p_val_df.to_csv('outs/p_values_by_channels.csv')

a='''
p_all, p_all_val = p_value_thresholding(reduced_dataset_all, feature_subset=all_features)
    p_1, p_1_val = p_value_thresholding(reduced_dataset_1, feature_subset=all_features)
    p_2, p_2_val = p_value_thresholding(reduced_dataset_2, feature_subset=all_features)
    p_3, p_3_val = p_value_thresholding(reduced_dataset_3, feature_subset=all_features)
'''

  t_stat, p_value = stats.ttest_ind(
  t_stat, p_value = stats.ttest_ind(
  t_stat, p_value = stats.ttest_ind(
  t_stat, p_value = stats.ttest_ind(


## Yurteri's Method

In [None]:
p_better = []
for i in range(len(accuracies) - 1):
    delta = accuracies[i+1] - accuracies[i]
    if delta <= 0:
        continue
    else:
        
        p_better.append(p_all[i])

data = data_preparation(selected_channels=selected_channels, selected_labels=selected_labels, feature_subset=p_better)
for model in models:
    training, test = model_training(data, model, stats=False, cm=False, verbose=True)

In [None]:
import matplotlib.pyplot as plt
plt.plot(np.linspace(1,len(accuracies),len(accuracies)), accuracies)
plt.legend()
plt.savefig('foo.png', bbox_inches='tight')
plt.grid()
plt.show()
plt.legend(['GBC', 'K-NN', 'SVM', 'DTC', 'NN'])

## ReliefF

In [37]:
from ReliefF import ReliefF
dataset = utils.data_loader(csv_file)
best_channel_list = ['CP3', 'Cz', 'CPz', 'P3']
reduced_dataset = utils.channel_selection(dataset, best_channel_list)
X_train, X_test, y_train, y_test = training.data_preparation(dataset=reduced_dataset, feature_subset=all_features)
X = np.concatenate([X_train, X_test])
y = np.concatenate([y_train, y_test]).flatten()
fs = ReliefF(n_neighbors=1, n_features_to_keep=79)
rf = fs.fit_transform(X, y)

In [64]:
rf_scores = pd.DataFrame(columns=dataset.columns[1:80])
for idx, col in enumerate(rf_scores.columns):
    rf_scores[col] = rf[:][idx]
rf_scores.to_csv('rf scores.csv')