# Setup

Initial module setup.

In [None]:
import pathlib
import re
import mne.io
import pandas as pd
import typing
import mne
import numpy as np

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import StratifiedKFold
from eeg_auth_defense_utilities import data_retrieval, filtration, features, formatting

# Constants

In [None]:
DATASET_SAMPLE_FREQ_HZ = 200
DATA_CHANNEL_NAMES = ['T7','F8','Cz','P4']
FREQUENCIES = [
    filtration.FrequencyBand(lower=8.0, upper=12.0, label='Alpha'),
    filtration.FrequencyBand(lower=12.0, upper=35.0, label='Beta'),
    filtration.FrequencyBand(lower=4.0, upper=8.0, label='Theta'),
    filtration.FrequencyBand(lower=35.0, upper=None, label='Gamma'),
    filtration.FrequencyBand(lower=None, upper=None, label='Raw'),
]

# Utilities

## Types

In [None]:
T = typing.TypeVar('T')

## Functions

In [None]:
def filter_subject_data(subject_data: formatting.SubjectDataMap) -> formatting.SubjectDataMap:
    """
    Applies filtration to all the dataframes for each subject in the given data map.
    
    :param subject_data: the subject data to filter.
    :return: a new data map, wherein the keys are the subject identifiers and the values are the
             filtered data.
    """
    data_windows_filtered = {}
    bandpass_filter = filtration.EEGBandpassFilter(FREQUENCIES)
    
    for identifier, data_to_filter in subject_data.items():
        mne_data = convert_dataframe_to_mne(data_to_filter)
        data_windows_filtered[identifier] = bandpass_filter.apply_filter(mne_data, DATA_CHANNEL_NAMES)
        
    return data_windows_filtered


def convert_dataframe_to_mne(dataframe: pd.DataFrame) -> mne.io.RawArray:
    """
    Converts the given dataframe over to Python-MNE format.
    
    :param dataframe: The dataframe to convert.
    :return: A Python-MNE data array.
    """
    transposed_dataframe = dataframe.transpose(copy=True)
    data_info = mne.create_info(DATA_CHANNEL_NAMES, DATASET_SAMPLE_FREQ_HZ, ch_types='eeg')
    return mne.io.RawArray(transposed_dataframe.to_numpy(), data_info)


def get_sample_value_from_map(map_to_sample: typing.Dict[str, T]) -> T:
    """
    Helper function which retrieves a sample dataframe from the given map of data.
    
    :param map_to_sample: The data map to get a sample from. 
    """
    return next(iter(map_to_sample.values()))

# Setup Dataset

In [None]:
dataset_downloader = data_retrieval.AuditoryDataDownloader()
dataset_path = dataset_downloader.retrieve()
dataset_formatter = formatting.AuditoryDataFormatter()
data = dataset_formatter.format_data(dataset_path)
print(f'{len(data.keys())} subjects loaded from auditory dataset...')
print('Sample:')
sample_value = get_sample_value_from_map(data)
sample_value.head()

# Pre-process Data

In [None]:
data = filter_subject_data(data)
print('Filtered data...')
print('Sample:')
sample_value = get_sample_value_from_map(data)
sample_value.head()

# Feature Extraction

In [None]:
# TODO