## Dependencies

In [2]:
import numpy as np
import pandas as pd

import utils, selection, training, constants

## I/O - Initialization

In [4]:
# load the feature dataset as a dataframe
channel_names =  ["Fp1", "Fp2", "F7", "F3", "Fz", "F4", "F8", "FT7", "FC3", "FCZ", "FC4", "FT8", "T3", "C3", "Cz", "C4", "T4", "TP7", "CP3", "CPz", "CP4", "TP8", "T5", "P3", "PZ", "P4", "T6", "O1", "Oz" , "O2"]

#channel subsets
test_channels_1 = ["Cz","CP3","CPz","P3"]
test_channels_2 = ["Fp1", "Fp2", "F7", "F3", "Fz", "F4", "F8", "FT7", "FC3", "FCZ", "FC4", "FT8", "T3", "C3", "Cz", "C4", "T4", "TP7", "CP3", "CPz", "CP4", "TP8", "T5", "P3", "PZ", "P4", "T6", "O1", "Oz" , "O2"]
test_channels_3 = ["Cz","CP3","CPz","P3","FT7","FC3","FCZ","FC4","C4","CP4","TP8","T5","PZ","T6","O1","Oz","O2"]

csv_file = 'eeg_features.csv'

dataset = utils.data_loader(constants.MAIN_CSV_FILE)
reduced_dataset_all = utils.channel_selection(dataset, channel_names)
reduced_dataset_1 = utils.channel_selection(dataset, test_channels_1)
reduced_dataset_1 = utils.channel_selection(dataset, test_channels_1)
reduced_dataset_2 = utils.channel_selection(dataset, test_channels_2)
reduced_dataset_3 = utils.channel_selection(dataset, test_channels_3)

all_features = reduced_dataset_all.columns[:len(reduced_dataset_all.columns) - 1]

## Per-Channel Training+Incremental Training

In [None]:
#result = incremental_training(dataset=dataset, channel_list=channel_list, feature_subset=all_features, models=['K-NN'], mode='feature', save=True)
#calculate accuracy for each channel
for channel in channel_names:
    print(channel)
    models = ['K-NN', 'GBC']
    dataset = utils.data_loader(csv_file)
    reduced_dataset = utils.channel_selection(dataset, [channel])
    data = training.data_preparation(dataset=reduced_dataset, feature_subset=all_features)
    for model in models:
        model_training(data, model, stats=False, cm=False)

## Chi-Square

In [8]:
feature_subsets = {}
feature_subsets['bands'] = ['delta_power', 'theta_power',
       'alpha_power', 'beta_power', 'gamma_power', 'gamma_beta', 'gamma_alpha',
       'gamma_theta', 'gamma_delta', 'beta_alpha', 'beta_theta', 'beta_delta',
       'alpha_theta', 'alpha_delta', 'theta_delta']

feature_subsets['time_features'] = ['skew', 'kurtosis', 'rms', 'activity', 'mobility', 'complexity', 'dfa', 'mean_abs_sec_dif']

feature_subsets['freq_features'] = ['spc_cnt', 'spc_roff', 'zc', 'slope']

feature_subsets['mfcc_features'] = ['mfcc_0', 'mfcc_1', 'mfcc_2', 'mfcc_3',
       'mfcc_4', 'mfcc_5', 'mfcc_6', 'mfcc_7', 'mfcc_8', 'mfcc_9']

feature_subsets['mel_features'] = ['mel_0', 'mel_1', 'mel_2',
               'mel_3', 'mel_4', 'mel_5', 'mel_6', 'mel_7', 'mel_8', 'mel_9', 'mel_10',
               'mel_11', 'mel_12', 'mel_13', 'mel_14']

feature_subsets['chr_features'] = ['chr_0',
                'chr_1', 'chr_2', 'chr_3', 'chr_4', 'chr_5', 'chr_6', 'chr_7', 'chr_8',
                'chr_9', 'chr_10', 'chr_11', 'chr_12', 'chr_13', 'chr_14', 'chr_15',
                'chr_16', 'chr_17', 'chr_18', 'chr_19']

feature_subsets['ton_features'] = ['ton_0', 'ton_1', 'ton_2', 'ton_3', 'ton_4', 'ton_5']

feature_subsets['spectral_features'] = feature_subsets['mfcc_features'] + feature_subsets['mel_features']

feature_subsets['music'] = feature_subsets['chr_features'] + feature_subsets['ton_features']

feature_subsets['coeffs'] = feature_subsets['spectral_features'] + feature_subsets['music']

feature_subsets['comb_domain'] = feature_subsets['time_features'] + feature_subsets['freq_features'] + feature_subsets['bands']

feature_subsets['no_music'] = feature_subsets['spectral_features'] + feature_subsets['comb_domain']

In [9]:
import numpy as np
from sklearn.feature_selection import chi2
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()

X_train, X_test, y_train, y_test = training.data_preparation(dataset=reduced_dataset_1, feature_subset=feature_subsets['no_music'])
X = np.concatenate([X_train, X_test])
X = scaler.fit_transform(X)
y = np.concatenate([y_train, y_test]).flatten()

chi2_stats, p_values = chi2(X, y)

In [10]:
chi2_dict = {}
for idx, feature in enumerate(feature_subsets['no_music']):
    chi2_dict[feature] = chi2_stats[idx]
chi_sorted = dict(sorted(chi2_dict.items(), key=lambda item: item[1], reverse=True))

## ANOVA

In [11]:
variance_df, variance_dict = selection.variance_thresholding(reduced_dataset_1, threshold=0.2)

In [12]:
from sklearn.feature_selection import f_classif
scaler = MinMaxScaler()

X_train, X_test, y_train, y_test = training.data_preparation(dataset=reduced_dataset_1, feature_subset=feature_subsets['no_music'])
X = np.concatenate([X_train, X_test])
X = scaler.fit_transform(X)
y = np.concatenate([y_train, y_test]).flatten()

f_statistic, p_values = f_classif(X, y)

In [13]:
f_dict = {}
for idx, feature in enumerate(feature_subsets['no_music']):
    f_dict[feature] = f_statistic[idx]
f_sorted = dict(sorted(f_dict.items(), key=lambda item: item[1], reverse=True)) 

## P-Value Thresholding

In [45]:
channel_subsets = [channel_names, test_channels_1, test_channels_2, test_channels_3]
subset_names = ['chn_all', 'chn_1', 'chn_2', 'chn_3']
p_val_df = pd.DataFrame(columns=subset_names)

for i in range(len(channel_subsets)):
    dataset = data_loader(csv_file)
    reduced_dataset_i = utils.channel_selection(dataset, channel_subsets[i])
    p_i, p_i_val = selcetion.p_value_thresholding(reduced_dataset_i, feature_subset=all_features)
    p_val_df[subset_names[i]] = p_i_val    

p_val_df.to_csv('outs/p_values_by_channels.csv')

a='''
p_all, p_all_val = p_value_thresholding(reduced_dataset_all, feature_subset=all_features)
    p_1, p_1_val = p_value_thresholding(reduced_dataset_1, feature_subset=all_features)
    p_2, p_2_val = p_value_thresholding(reduced_dataset_2, feature_subset=all_features)
    p_3, p_3_val = p_value_thresholding(reduced_dataset_3, feature_subset=all_features)
'''

  t_stat, p_value = stats.ttest_ind(
  t_stat, p_value = stats.ttest_ind(
  t_stat, p_value = stats.ttest_ind(
  t_stat, p_value = stats.ttest_ind(


## Manual Feature Selection

In [6]:
#models = ['K-NN', 'GBC']
models = ['K-NN']
subset_1 = all_features

data = training.data_preparation(dataset=reduced_dataset, feature_subset=subset_1, pca=True)
for model in models:
    training.model_training(data, model, stats=False, cm=False, verbose=True)

Accuracy of K-NN classifier on training set: 0.79962894
Accuracy of K-NN classifier on test set: 0.66913580


## Yurteri's Method

In [None]:
p_better = []
for i in range(len(accuracies) - 1):
    delta = accuracies[i+1] - accuracies[i]
    if delta <= 0:
        continue
    else:
        
        p_better.append(p_all[i])

data = data_preparation(selected_channels=selected_channels, selected_labels=selected_labels, feature_subset=p_better)
for model in models:
    training, test = model_training(data, model, stats=False, cm=False, verbose=True)

In [None]:
import matplotlib.pyplot as plt
plt.plot(np.linspace(1,len(accuracies),len(accuracies)), accuracies)
plt.legend()
plt.savefig('foo.png', bbox_inches='tight')
plt.grid()
plt.show()
plt.legend(['GBC', 'K-NN', 'SVM', 'DTC', 'NN'])

## ReliefF

In [37]:
from ReliefF import ReliefF
dataset = utils.data_loader(csv_file)
best_channel_list = ['CP3', 'Cz', 'CPz', 'P3']
reduced_dataset = utils.channel_selection(dataset, best_channel_list)
X_train, X_test, y_train, y_test = training.data_preparation(dataset=reduced_dataset, feature_subset=all_features)
X = np.concatenate([X_train, X_test])
y = np.concatenate([y_train, y_test]).flatten()
fs = ReliefF(n_neighbors=1, n_features_to_keep=79)
rf = fs.fit_transform(X, y)

In [64]:
rf_scores = pd.DataFrame(columns=dataset.columns[1:80])
for idx, col in enumerate(rf_scores.columns):
    rf_scores[col] = rf[:][idx]
rf_scores.to_csv('rf scores.csv')