# Classification of EEG Signals

This notebook is a guide to classification of EEG Signals obtained from the [Keirn EEG Database](https://www.cs.colostate.edu/eeg/main/data/1989_Keirn_and_Aunon). This implementation is an attempt at reproducing the classification described by _Charles W. Anderson and Zlatko Sijerˇci ́c_ in this [paper](https://www.cs.colostate.edu/~anderson/wp/pubs/anderson-sijercic-96.pdf).

In [None]:
import mne
import math
import scipy
from scipy import signal
import scipy.io
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.regression.linear_model import burg

In [None]:
def print_affirm(to_print):
    """
    Helper method for pretty printing important statements 
    ...
    Parameters
    ----------
    to_print : any
        the message to print
    """
    print('\u279C ' + str(to_print))

In [None]:
def create_mne_info(n_channels, s_freq, ch_names, ch_types):
    """
    Method to create a global mne info object for the loaded eeg data
    ...
    Parameters
    ----------
    n_channels : int
        Number of channels used for collecting data
    s_freq
        The sampling frequency used for data collection
    ch_names : array
        An array of strings containing channel names
    ch_types : array
        An array of strings containing the channel types for each channel in ch_names
    """
    
    info = mne.create_info(ch_names, ch_types=ch_types, sfreq=s_freq)
    info.set_montage('standard_1020')
    return info

In [None]:
def load_dataset(input_file_name):
    """
    Method to load the eeg dataset
    ...
    Parameters
    ----------
    input_file_name 
        The name of the file containing eeg data
    """
    
    mat = scipy.io.loadmat(input_file_name)
    return mat['data'][0]

In [None]:
def segment_eeg_data(data_arr, info, description):
    """
    Method to segment eeg data
    ...
    Parameters
    ----------
    data_arr 
        The data array from containing eeg data
    info : info object from mne containing meta data
        Info object from mne containing meta data
    description : str
        The description to give to each segment
    """
    
    data_dict = {
        'subject': [],
        'trial': [],
        'task': []
    }
    
    coeff_arr = []
    onset_arr = np.arange(0, 10, 0.25)
    duration_arr = [0.5] * len(onset_arr)
    description_arr =  [description] * 40
    my_annot = mne.Annotations(
                onset=onset_arr, 
                duration=duration_arr, 
                description=description_arr
            )
    mne.set_log_level('error')
    for obs in data_arr:
        raw = mne.io.RawArray(obs[0][3], info)
        raw.set_annotations(my_annot)
        burgs_values = gen_autoregression_coeff(obs[0][3], raw.annotations, raw)
        for burg in burgs_values:
            for coeff_arr in burg:
                obs_index = 1
                data_dict['subject'].append(obs[0][0][-1])
                data_dict['task'].append(obs[0][1][0])
                data_dict['trial'].append((obs[0][2][0][-2] + obs[0][2][0][-1]).strip())
                for coeff in coeff_arr[0]:
                    if obs_index in data_dict.keys():
                        data_dict[obs_index].append(coeff)
                    else:
                        data_dict[obs_index] = [coeff]
                    obs_index += 1
        coeff_arr.append(burgs_values)
    export_to_csv(data_dict, 'coefficients')
    print_affirm('.csv file for coefficients created and saved')
    print_affirm('EEG segmentation done successfully')
    return coeff_arr

In [None]:
def gen_autoregression_coeff(data_arr, annotations, annt_raw):
    """
    Method to generate Burg's coefficients 
    ...
    Parameters
    ----------
    data_arr 
        The data array from which to extract sub arrays
    annotations : array
        Array containing annotations of the eeg signals
    annt_raw : Raw object
        A Raw array object from MNE containing the annotated eeg data 
    """
    
    burgs_values = []
    mne.set_log_level('error')
    for annt in annotations:
        start = math.floor(annt['onset']*250)
        duration = math.floor(annt['duration']*250)
        end = start + duration
        obs_values = []
        for segment in data_arr[:6]:
            # for each segment find the burg's coefficients
            rho_sigma = list(burg(segment[start:end], order=6))
            obs_values.append(rho_sigma)
        burgs_values.append(obs_values)
    return burgs_values

In [None]:
def mean_eye_blinks(data_arr): 
    """
    Method to remove eye blinks from the given eeg data
    ...
    Parameters
    ----------
    data_arr 
        The data array from which to remove eye blink data
    """
    
    temp = data_arr
    for obs in temp:
        for eeg in obs[0][3]:
            win_coeffs = signal.firwin(numtaps=10, cutoff=30, window='hamming', pass_zero=True, scale=True, fs=250)
            lfilt_result = signal.lfilter(win_coeffs, [1.0], eeg)
            # find peaks in the eeg signal that can be discarded
            b_peaks = signal.find_peaks(lfilt_result, height=28)
            # find peak widths
            b_widths = signal.peak_widths(lfilt_result, b_peaks[0], rel_height=0.6, prominence_data=None, wlen=None)
            left_points = b_widths[2]
            right_points = b_widths[3]
            eeg_mean = np.mean(eeg)
            if len(left_points) > 0 and len(right_points) > 0:
                # optimise the values of the peaks to fit with general data by replacing it with mean value
                for idx in range(0, len(left_points)):
                    width_arr = np.arange(int(left_points[idx]), int(right_points[idx]+1), 1)
                    for point in width_arr:
                        eeg[point] = (eeg[point] - lfilt_result[point])  + eeg_mean
    return temp

In [None]:
def save_data_to_mat(data_arr, output_file_name):
    """
    Method to create a .mat file from a data array in appropriate dimensions and shape
    ...
    Parameters
    ----------
    data_arr 
        The data array from which to create the .mat file
    output_file_name : str
        The file name of the output file
    """
    
    new_mat_file = {
        'data' : [data_arr]
    }
    scipy.io.savemat(output_file_name, new_mat_file)

In [None]:
def export_to_csv(data, output_file_name):
    """
    Method to export data to csv format
    ...
    Parameters
    ----------
    data : dict
        Dictionary containing key-value pairs, with the values being equal length arrays
    output_file_name : str
        The file name of the output file
    """
    # create a dataframe
    dataframe = pd.DataFrame(data)
#     print(data.keys())
#     for key in data.keys():
#         print(len(data[key]))
    # convert the dataframe to csv
    dataframe.to_csv(output_file_name + '.csv', encoding='utf-8')
    print(output_file_name + '.csv created')

In [None]:
def create_csv_dataset(input_file_name, output_file_name):
    """
    Method to create the csv dataset from the original .mat file
    ...
    Parameters
    ----------
    input_file_name : str
        The input file name to be read
    output_file_name : str
        The file name of the output file
    """
    
    mat = scipy.io.loadmat(input_file_name)
    data = mat['data'][0]
    
    subject_data = {
        'subject 1': [],
        'subject 2': [],
        'subject 3': [],
        'subject 4': [],
        'subject 5': [],
        'subject 6': [],
        'subject 7': [],
    }
    # create subject wise data lists
    for obs in data:
        if obs[0][0][0] in subject_data.keys():
            subject_data[obs[0][0][0]].append(obs)
        else:
            subject_data[obs[0][0][0]] = [obs]
        
    # index [0][0][3] of every subject  in subject_data contains 7 lists
    # index [0][0][3][0] of every subject in subject_data contains 2500 values
    for key in subject_data.keys():
        print(key + ' has ' + str(len(subject_data[key])) + ' observations')
        
    # initialise the data dictionary with required keys
    data = {
        'subject' : [],
        'task' : [],
        'trial' : []
    }
    # for every subject, create the data row
    for sub in subject_data.keys():
        for obs in subject_data[sub]:
            # loop through the 7 channels
            for signal_values in obs[0][3]:
                data['subject'].append(sub[-1])
                data['task'].append(obs[0][1][0])
                data['trial'].append((obs[0][2][0][-2] + obs[0][2][0][-1]).strip())
                # initialise the index for the 2500 observations
                obs_index = 1
                # run through the 2500 values for every channel
                for val in signal_values:
                    if obs_index in data.keys():
                        data[obs_index].append(val)
                    else:
                        data[obs_index] = [val]
                    obs_index += 1
    
    # print the number of rows in the dataset 
    print('The dataset is ' + str(len(data['subject'])) + ' rows X ' + str(len(data.keys())) + ' columns')
    # output confirms that there are (325 X 7) rows and (2500 + 3) columns
    
    export_to_csv(data, output_file_name)

In [None]:
def main():
    data_arr = load_dataset('eegdata.mat')
    print_affirm('Dataset loaded')
    
    # set preliminary information for mne
    n_channels = 7
    sampling_freq = 250 
    ch_names = ['C3', 'C4', 'P3', 'P4', 'O1', 'O2', 'EOG']
    ch_types = ['eeg'] * 6
    ch_types.append('eog')
    info = create_mne_info(n_channels=7, s_freq=sampling_freq, ch_names=ch_names, ch_types=ch_types)
    print_affirm('MNE Info created')
    
    # average eye blink data
    filtered_data_arr = mean_eye_blinks(data_arr)
    print_affirm('Eye blinks removed from EEG data')

    # find burg's coefficients after segmenting data : which will serve as the inputs
    burgs_coeffs = segment_eeg_data(filtered_data_arr, info, 'segment')
    print_affirm('AR coefficients obtained by Burgs method')
    
    # save the data to a mat file
    save_data_to_mat(filtered_data_arr, 'mean_eye_blinks.mat')
    print_affirm('Filtered .mat file created')
    
    # create a csv from the .mat file (can be used if required)
    create_csv_dataset('mean_eye_blinks.mat', 'mean_eye_blinks')
    print_affirm('.csv file of corrected data created and saved')

In [None]:
if __name__ == '__main__':
    main()