## Dependencies

In [1]:
import numpy as np
import pandas as pd

from utils import *

## I/O - Initialization

In [2]:
# load the feature dataset as a dataframe
csv_file = 'eeg_features.csv'
df = pd.read_csv(csv_file,float_precision='round_trip')
df = df.drop('Unnamed: 0', axis=1)

In [3]:
# split the dataset to features and labels
features = df.drop('label', axis=1)
labels = df.iloc[:,-1:]
display_labels = ['drowsy' if label == 1 else 'alert' for label in labels['label'].unique()]

In [4]:
# select the channels to be processed
channel_list = ['F3', 'F4','C3','Cz','Oz']

# select the models to be trained
models = ['GBC', 'K-NN', 'SVM', 'DTC', 'RFC', 'Logistic Regression', 'NN']

selected_channels, selected_labels = channel_selection(features=features, labels=labels, channel_list=channel_list)
data = data_preparation(selected_channels=selected_channels, selected_labels=selected_labels, feature_subset=selected_channels.columns)

## P-Value Thresholding

In [None]:
all = feature_selection(selected_channels=selected_channels, feature_subset=selected_channels.columns) # select every feature
p_all, p_dict = p_value_thresholding(selected_features=all, selected_labels=selected_labels)

## Training

In [None]:
for model in models:
    model_training(data, model, display_labels, stats=True, cm=False, verbose=False)

## High-Rank Feature Testing

In [None]:
# training with the features that rank highest on the variance thresholded feature subset
high_rank_subset = p_value_slicing(p_values=p_all, stop_feature='peak_freq')
data = data_preparation(selected_channels=data, selected_labels=selected_labels, feature_subset=high_rank_subset)

for model in models:
    model_training(data, model, display_labels, stats=True, cm=False)