In [1]:
import numpy as np
import pandas as pd

import utils, training, constants

In [2]:
# select the channels to be processed
dataset = utils.data_loader(path = constants.MAIN_CSV_FILE)
reduced_dataset = utils.channel_selection(dataset, constants.ALL_CHANNELS)

## Individual Channel Performances

In [5]:
#all_channels_perf = pd.DataFrame(columns=all_channels)
#all_channels_perf = dict.fromkeys(all_channels, None)
all_channels_perf = {}

import time, tqdm
start = time.time()

for channel in tqdm.tqdm(constants.ALL_CHANNELS):
    reduced_dataset = utils.channel_selection(dataset, [channel])
    data = training.data_preparation(dataset=reduced_dataset, feature_subset=constants.ALL_FEATURES)
    for model in constants.ALL_MODELS:
        all_channels_perf[channel + '-' + model] = training.model_training(data, model, stats=False)

end = time.time()
print(end-start)



In [11]:
all_channels_perf_df = pd.DataFrame.from_dict(all_channels_perf)
all_channels_perf_df.to_csv('indv_channel_perfs.csv')

## Feature Performances

In [3]:
dataset = utils.data_loader(path = constants.MAIN_CSV_FILE)
reduced_dataset = utils.channel_selection(dataset, constants.ALL_CHANNELS)

In [4]:
feature_subsets = {}
feature_subsets['bands'] = ['delta_power', 'theta_power',
       'alpha_power', 'beta_power', 'gamma_power', 'gamma_beta', 'gamma_alpha',
       'gamma_theta', 'gamma_delta', 'beta_alpha', 'beta_theta', 'beta_delta',
       'alpha_theta', 'alpha_delta', 'theta_delta']

feature_subsets['time_features'] = ['skew', 'kurtosis', 'rms', 'activity', 'mobility', 'complexity', 'dfa', 'mean_abs_sec_dif']

feature_subsets['freq_features'] = ['spc_cnt', 'spc_roff', 'zc', 'slope']

feature_subsets['mfcc_features'] = ['mfcc_0', 'mfcc_1', 'mfcc_2', 'mfcc_3',
       'mfcc_4', 'mfcc_5', 'mfcc_6', 'mfcc_7', 'mfcc_8', 'mfcc_9']

feature_subsets['mel_features'] = ['mel_0', 'mel_1', 'mel_2',
               'mel_3', 'mel_4', 'mel_5', 'mel_6', 'mel_7', 'mel_8', 'mel_9', 'mel_10',
               'mel_11', 'mel_12', 'mel_13', 'mel_14']

feature_subsets['chr_features'] = ['chr_0',
                'chr_1', 'chr_2', 'chr_3', 'chr_4', 'chr_5', 'chr_6', 'chr_7', 'chr_8',
                'chr_9', 'chr_10', 'chr_11', 'chr_12', 'chr_13', 'chr_14', 'chr_15',
                'chr_16', 'chr_17', 'chr_18', 'chr_19']

feature_subsets['ton_features'] = ['ton_0', 'ton_1', 'ton_2', 'ton_3', 'ton_4', 'ton_5']

feature_subsets['spectral_features'] = feature_subsets['mfcc_features'] + feature_subsets['mel_features']

feature_subsets['music'] = feature_subsets['chr_features'] + feature_subsets['ton_features']

feature_subsets['coeffs'] = feature_subsets['spectral_features'] + feature_subsets['music']

feature_subsets['comb_domain'] = feature_subsets['time_features'] + feature_subsets['freq_features'] + feature_subsets['bands']

feature_subsets['no_music'] = feature_subsets['spectral_features'] + feature_subsets['comb_domain']

feature_subsets['all'] = feature_subsets['coeffs'] + feature_subsets['comb_domain']

In [None]:
for k in feature_subsets:
    print("-------Feature Subset: {} -------".format(k))
    data = training.data_preparation(dataset=reduced_dataset, feature_subset=feature_subsets[k])
    for model in constants.ALL_MODELS:
        training.model_training(data, model, stats=True, cm=False)


In [37]:
feature_results = {}
for model in models:
    for k in feature_subsets:
        data = training.data_preparation(dataset=reduced_dataset, feature_subset=feature_subsets[k])
        feature_results[k] = training.model_training(data, model, stats=False, cm=False)

    features_perf_df = pd.DataFrame.from_dict(feature_results)
    features_perf_df.to_csv('outs/{}_feature_perf.csv'.format(model))

In [38]:
feature_results = {}
reduced_dataset1 = channel_selection(dataset, ["Cz","CP3","CPz","P3", 'O1'])
for model in models:
    for k in feature_subsets:
        data = data_preparation(dataset=reduced_dataset1, feature_subset=feature_subsets[k])
        feature_results[k] = model_training(data, model, stats=False, cm=False)

    features_perf_df = pd.DataFrame.from_dict(feature_results)
    features_perf_df.to_csv('{}_feature_perfO1.csv'.format(model))

## Model Performances

In [6]:
X_train, X_test, y_train, y_test = training.data_preparation(dataset=reduced_dataset, feature_subset=constants.ALL_FEATURES)

In [None]:
from scipy import stats
from tqdm import tqdm
results_dict = {}
for model in tqdm(constants.ALL_MODELS):
    model_results = training.model_training(data = [X_train, X_test, y_train, y_test], model_family=model)
    t_stat, p_value = stats.ttest_ind(model_results['predictions'], y_test)
    results_dict[model + '_t'] = t_stat
    results_dict[model + '_pval'] = p_value
results_df = pd.DataFrame.from_dict(results_dict)

In [40]:
results_dict

{'K-NN_t': array([-10.55984707]),
 'K-NN_pval': array([5.20917787e-26]),
 'K-NN1_t': array([-3.21081517]),
 'K-NN1_pval': array([0.00132531]),
 'K-NN2_t': array([-16.55796351]),
 'K-NN2_pval': array([3.04255371e-61]),
 'K-NN3_t': array([-5.8102629]),
 'K-NN3_pval': array([6.31543069e-09]),
 'SVM_t': array([-3.22366805]),
 'SVM_pval': array([0.00126727]),
 'DTC_t': array([0.17974952]),
 'DTC_pval': array([0.85735071]),
 'RFC_t': array([-3.91791683]),
 'RFC_pval': array([8.95622585e-05]),
 'Logistic Regression_t': array([-1.83614421]),
 'Logistic Regression_pval': array([0.06634856]),
 'NN_t': array([-1.14270379]),
 'NN_pval': array([0.25317287]),
 'GBC_t': array([-3.21081517]),
 'GBC_pval': array([0.00132531])}