# Feature Extraction

This notebook is an attempt to follow the steps outlined to extract features from the Keirn dataset as described by *Suman Duttaa, Mandeep Singha, and Amod Kumar* in [this paper](https://www.sciencedirect.com/science/article/pii/S1746809418300478?casa_token=O4FQDvOwuqEAAAAA:0KLqq4okPDih95kJeWcOxLSZawKwH9OgsLjCxOS2joy_1cR5T28s6M0IVV17-DslhU9C-yWWDb0). Suitable assumptions were made wherever sufficient information about the feature extraction process was not given in the paper. Further, these extracted features were passed into an LSSVM model as given in [this notebook](https://github.com/Parthiv-M/eeg-classification/blob/main/dutta_2018/LSSVM.ipynb).

In [None]:
import pandas as pd
from PyEMD import EMD
from statsmodels.regression.linear_model import burg
from statsmodels.tsa.stattools import levinson_durbin

In [None]:
def read_dataset(file_name):
    """
    Method to read the csv dataset
    ...
    Parameters
    ----------
    file_name : str
        Name of the file to be read
    """
    eeg_csv = pd.read_csv(file_name)
    eeg_csv = eeg_csv[(eeg_csv['subject'] == 1) | (eeg_csv['subject'] == 3) | (eeg_csv['subject'] == 5) ]
    eeg_csv = eeg_csv[(eeg_csv['task'] == 'baseline') | (eeg_csv['task'] == 'letter-composing')]
    eeg_csv =  eeg_csv.loc[:, ~eeg_csv.columns.isin(['subject', 'trial','Unnamed: 0'])]
    return eeg_csv

In [None]:
def calculate_row_imfs(eeg_row, task):
    """
    Method to calculate the IMF for the given signal
    ...
    Parameters
    ----------
    eeg_row : array
        The initial signal to compute the IMF
    """
    emd = EMD()
    emd.FIXE = 10
    imfs = emd.emd(eeg_row)
    if len(imfs) >= 9:
        if task == "baseline" or task == "letter-composing":
            return imfs[8]
        elif task == "multiplication":
            return imfs[7]

In [None]:
def find_levinson_coeffs(imf):
    """
    Method to find levinson coefficients for the input
    ...
    Parameters
    ----------
        imf : the input for the levinson computation
    """
    print(imf)
    if imf is not None:
        coeffs = levinson_durbin(imf,6)
        return coeffs[1]

In [None]:
def find_burgs_coeffs(imf):
    """
    Method to find Burgs coefficients for the input
    ...
    Parameters
    ----------
        imf : the input for the burgs computation
    """
    return burg(imf, order=6)[0]

In [None]:
def export_to_csv(data, output_file_name):
    """
    Method to export data to csv format
    ...
    Parameters
    ----------
    data : dict
        Dictionary containing key-value pairs, with the values being equal length arrays
    output_file_name : str
        The file name of the output file
    """
    # create a dataframe
    dataframe = pd.DataFrame(data)
    # convert the dataframe to csv
    dataframe.to_csv(output_file_name + '.csv', encoding='utf-8')
    print(output_file_name + '.csv created')

In [None]:
def main():
    """
    The driver function for the code
    """
    data_csv = read_dataset('eegdataset.csv')
    
    imfs = []
    data_dict = {
        'task': []
    }

    # for every row in the dataset, calculate IMF
    for i in range(len(data_csv)):
        eeg_row = data_csv.iloc[i:i+1,1:].to_numpy()
        task = data_csv.iloc[i:i+1,:1].to_numpy()[0][0]
        row_imf = calculate_row_imfs(eeg_row[0], task)
        imfs.append(row_imf)
    
    # for every IMF, calculate the levvinson coefficient
    for idx, imf in enumerate(imfs):
        task = data_csv.iloc[idx:idx+1,:1].to_numpy()[0][0]
        lev_coeffs = find_levinson_coeffs(imf)
        if lev_coeffs is not None:
            data_dict['task'].append(task)
            # create a csv of the levinson coefficients
            for ind, coeff in enumerate(lev_coeffs):
                if ind+1 in data_dict.keys():
                    data_dict[ind+1].append(coeff)
                else:
                    data_dict[ind+1] = [coeff]

    # export to csv for better usability
    export_to_csv(data_dict, 'letter_base')

In [None]:
if __name__ == "__main__":
    main()