## Dependencies

In [1]:
import numpy as np
import pandas as pd

from utils import *

## I/O - Initialization

In [2]:
# load the feature dataset as a dataframe
csv_file = 'eeg_features.csv'
df = pd.read_csv(csv_file,float_precision='round_trip')
df = df.drop('Unnamed: 0', axis=1)

In [3]:
# split the dataset to features and labels
features = df.drop('label', axis=1)
labels = df.iloc[:,-1:]
display_labels = ['drowsy' if label == 1 else 'alert' for label in labels['label'].unique()]

In [4]:
# select the channels to be processed
channel_list = ['F3', 'F4','C3','Cz','Oz']

# select the models to be trained
models = ['GBC', 'K-NN', 'SVM', 'DTC', 'RFC', 'Logistic Regression', 'NN']

selected_channels, selected_labels = channel_selection(features=features, labels=labels, channel_list=channel_list)
data = data_preparation(selected_channels=selected_channels, selected_labels=selected_labels, feature_subset=selected_channels.columns)

## P-Value Thresholding

In [None]:
all = feature_selection(selected_channels=selected_channels, feature_subset=selected_channels.columns) # select every feature
p_all, p_dict = p_value_thresholding(selected_features=all, selected_labels=selected_labels)

spc_roff 1.6147525601572645e-241
slope 2.1961591318448354e-218
mel_9 3.780620351407701e-197
mel_6 1.1018938972585085e-190
mel_8 7.18754692852844e-179
mel_7 4.889762489252905e-178
spc_cnt 2.887779599226387e-170
mel_5 5.517438027558447e-134
mfcc_2 3.901885014029416e-112
zc 8.621303483438252e-98
mel_1 7.514574188776398e-96
mel_2 4.0251470438104993e-85
mel_0 7.866398310131055e-85
mel_3 7.917307252001706e-84
dfa 5.4570769344308075e-80
mel_4 4.070881376943928e-78
gamma_beta 1.9635186392167987e-75
chr_9 1.0813929454262362e-62
chr_8 7.157621087120184e-59
gamma_alpha 1.8360025005449308e-58
mfcc_1 5.293632675036034e-58
mfcc_0 8.189398043909328e-58
mfcc_3 9.75984225924415e-57
alpha_delta 2.827046503995087e-54
alpha_theta 7.560080511524009e-42
chr_10 5.609345536289729e-37
peak_freq 1.2178319684208423e-34
beta_alpha 1.825251593945194e-32
mfcc_4 5.3463501550803495e-24
chr_7 3.0341498587256606e-21
gamma_delta 6.846410819985025e-20
gamma_theta 1.0310558268088689e-17
beta_theta 2.5202117308661734e-14
c

## High-Rank Feature Testing

In [None]:
# training with the features that rank highest on the variance thresholded feature subset
high_rank_subset = p_value_slicing(p_values=p_all, stop_feature='peak_freq')
#data = data_preparation(selected_channels=data, selected_labels=selected_labels, feature_subset=high_rank_subset)

#for model in models:
#    model_training(data, model, display_labels, stats=True, cm=False)

## Using Combinations for Feature Selection

In [15]:
# using the features that rank highest
subset = ['spc_roff',
 'slope',
 'mel_9',
 'mel_6',
 'mel_8',
 'mel_7',
 'spc_cnt',
 'mel_5',
 'mfcc_2',
 'zc',
 'mel_1',
 'mel_2',
 'mel_0',
 'mel_3',
 'dfa',
 'mel_4',
 'gamma_beta',
 'chr_9',
 'chr_8',
 'gamma_alpha',
 'mfcc_1',
 'mfcc_0',
 'mfcc_3',
 'alpha_delta',
 'alpha_theta',
 'chr_10']

ds_df = downsampling(df, sr=0.04)
ds_features = ds_df.drop('label', axis=1)
ds_labels = ds_df.iloc[:,-1:]
selected_channels, selected_labels = channel_selection(features=ds_features, labels=ds_labels, channel_list=channel_list)

In [17]:
feature_combination(feature_subset=subset, selected_channels=selected_channels, selected_labels=selected_labels, stop_feature='mel_7', min_n = 2, max_n = 5, training = True)