In [None]:
import mne
import math
import scipy
from scipy import signal
import scipy.io
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.regression.linear_model import burg

In [None]:
def print_affirm(to_print):
    """
    Helper method for pretty printing important statements 
    ...
    Parameters
    ----------
    to_print : any
        the message to print
    """
    print('\u279C ' + str(to_print))

In [None]:
def create_mne_info(s_freq, ch_names, ch_types):
    """
    Method to create a global mne info object for the loaded eeg data
    ...
    Parameters
    ----------
    s_freq
        The sampling frequency used for data collection
    ch_names : array
        An array of strings containing channel names
    ch_types : array
        An array of strings containing the channel types for each channel in ch_names
    """
    
    info = mne.create_info(ch_names, ch_types=ch_types, sfreq=s_freq)
    info.set_montage('standard_1020')
    return info

In [None]:
def load_dataset(input_file_name):
    """
    Method to load the eeg dataset
    ...
    Parameters
    ----------
    input_file_name : string
        The name of the file containing eeg data
    """
    
    mat = scipy.io.loadmat(input_file_name)
    return mat['data'][0]

In [None]:
def gen_autoregression_coeff(data_arr, annotations, index_of_zero):
    """
    Method to generate Burg's coefficients 
    ...
    Parameters
    ----------
    data_arr : Numpy array
        The data array from which to extract sub arrays
    annotations : array
        Array containing annotations of the eeg signals
    annt_raw : Raw object
        A Raw array object from MNE containing the annotated eeg data 
    """
    
    burgs_values = []
    mne.set_log_level('error')
    for annt in annotations:
        start = math.floor(annt['onset']*250)
        duration = math.floor(annt['duration']*250)
        end = start + duration
        obs_values = []
        for segment in data_arr[:6]:
            # for each segment find the burg's coefficients
            rho_sigma = list(burg(segment[start:end], order=6))
            obs_values.append(rho_sigma)
        burgs_values.append(obs_values)
    print_affirm('AR coefficients obtained by Burgs method')
    return burgs_values

In [None]:
def segment_eeg_data(data_arr, orig_arr, info, description):
    """
    Method to segment eeg data
    ...
    Parameters
    ----------
    data_arr : Numpy array
        The data array from containing eeg data
    info : info object from mne containing meta data
        Info object from mne containing meta data
    description : str
        The description to give to each segment
    """
    
    data_dict = {
        'subject': [],
        'trial': [],
        'task': []
    }
    
    coeff_arr = []
    mne.set_log_level('error')
    for idx, obs in enumerate(data_arr):
        # identify the index containing the padded zeros
        try:
            index_of_zero = obs[0][3][0].tolist().index(0.0, 2000)
        except ValueError:
            index_of_zero = len(obs[0][3][0])
        # extract the non-zero part of the segment
        range_end = (10/2500)*len(obs[0][3][0][:index_of_zero])
        onset_arr = np.arange(0, range_end, 0.25)
        duration_arr = [0.5] * len(onset_arr)
        description_arr =  [description] * len(onset_arr)
        my_annot = mne.Annotations(
                    onset=onset_arr, 
                    duration=duration_arr, 
                    description=description_arr
                )
        raw = mne.io.RawArray(obs[0][3], info)
        raw.set_annotations(my_annot)
        burgs_values = gen_autoregression_coeff(obs[0][3], raw.annotations, index_of_zero)
        for burg in burgs_values:
            for coeff_arr in burg:
                obs_index = 1
                data_dict['subject'].append(orig_arr[idx][0][0][-1])
                data_dict['task'].append(orig_arr[idx][0][1][0])
                data_dict['trial'].append((orig_arr[idx][0][2][0][-2] + orig_arr[idx][0][2][0][-1]).strip())
                for coeff in coeff_arr[0]:
                    if obs_index in data_dict.keys():
                        data_dict[obs_index].append(coeff)
                    else:
                        data_dict[obs_index] = [coeff]
                    obs_index += 1
        coeff_arr.append(burgs_values)
    print_affirm('EEG segmentation done successfully')
    export_to_csv(data_dict, 'coefficients')
    print_affirm('.csv file for coefficients created and saved')

In [None]:
def fill_with_zeros(eeg, len_of_signal):
    """
    Method to remove eye blinks from the given eeg data
    ...
    Parameters
    ----------
    eeg : Numpy array
        The list containing eeg signal data points with eye blinks removed 
    len_of_signal : int
        The length of the array being passed
    """
    if len_of_signal == 2500:
        return eeg
    zero_list = [0] * (2500 - len_of_signal)
    padded_eeg = eeg + zero_list
    return padded_eeg

In [None]:
def remove_eye_blinks(data_arr): 
    """
    Method to remove eye blinks from the given eeg data
    ...
    Parameters
    ----------
    data_arr : Numpy array
        The data array from which to remove eye blink data
    """
    
    temp = data_arr
    temp_arr = []
    for ind, obs in enumerate(temp):
        eog = obs[0][3][6]
        # find peaks in the eog signal that correspond to eye blinks
        b_peaks = signal.find_peaks(eog, height=28)
        # find peak widths
        b_widths = signal.peak_widths(eog, b_peaks[0], rel_height=0.6, prominence_data=None, wlen=None)
        # obtain the left data points for the peak
        left_points = b_widths[2]
        # obtain the right data points for the peak
        right_points = b_widths[3]    
        if len(left_points) > 0 and len(right_points) > 0:
            for eeg in obs[0][3][:6]:
                y = eeg.tolist()
                for idx in range(0, len(left_points)):
                    width_arr = np.arange(int(left_points[idx]), int(right_points[idx]+1), 1)
                    # for each data point in the width of the peak, remove that value since it corresponds to eye blink data
                    for point in width_arr:
                        if point < len(y):
                            y.remove(y[point])
                # fill the rest of the empty space of the shortened array with zero
                padded_eeg = fill_with_zeros(y, len(y))
                temp[ind][0][3][0] = padded_eeg
    return temp

In [None]:
def export_to_csv(data, output_file_name):
    """
    Method to export data to csv format
    ...
    Parameters
    ----------
    data : dict
        Dictionary containing key-value pairs, with the values being equal length arrays
    output_file_name : str
        The file name of the output file
    """
    # create a dataframe
    dataframe = pd.DataFrame(data)
    # convert the dataframe to csv
    dataframe.to_csv(output_file_name + '.csv', encoding='utf-8')
    print(output_file_name + '.csv created')

In [None]:
def create_csv_dataset(input_file_name, output_file_name):
    """
    Method to create the csv dataset from the original .mat file
    ...
    Parameters
    ----------
    input_file_name : str
        The input file name to be read
    output_file_name : str
        The file name of the output file
    """
    
    mat = scipy.io.loadmat(input_file_name)
    data = mat['data'][0]
    
    subject_data = {
        'subject 1': [],
        'subject 2': [],
        'subject 3': [],
        'subject 4': [],
        'subject 5': [],
        'subject 6': [],
        'subject 7': [],
    }
    # create subject wise data lists
    for obs in data:
        if obs[0][0][0] in subject_data.keys():
            subject_data[obs[0][0][0]].append(obs)
        else:
            subject_data[obs[0][0][0]] = [obs]
        
    # index [0][0][3] of every subject  in subject_data contains 7 lists
    # index [0][0][3][0] of every subject in subject_data contains 2500 values
    for key in subject_data.keys():
        print(key + ' has ' + str(len(subject_data[key])) + ' observations')
        
    # initialise the data dictionary with required keys
    data = {
        'subject' : [],
        'task' : [],
        'trial' : []
    }
    # for every subject, create the data row
    for sub in subject_data.keys():
        for obs in subject_data[sub]:
            # loop through the 7 channels
            for signal_values in obs[0][3]:
                data['subject'].append(sub[-1])
                data['task'].append(obs[0][1][0])
                data['trial'].append((obs[0][2][0][-2] + obs[0][2][0][-1]).strip())
                # initialise the index for the 2500 observations
                obs_index = 1
                # run through the 2500 values for every channel
                for val in signal_values:
                    if obs_index in data.keys():
                        data[obs_index].append(val)
                    else:
                        data[obs_index] = [val]
                    obs_index += 1
    
    # print the number of rows in the dataset 
    print('The dataset is ' + str(len(data['subject'])) + ' rows X ' + str(len(data.keys())) + ' columns')
    # output confirms that there are (325 X 7) rows and (2500 + 3) columns
    
    export_to_csv(data, output_file_name)

In [None]:
def main():
    data_arr = load_dataset('eegdata.mat')
    print_affirm('Dataset loaded')
    
    # set preliminary information for mne
    sampling_freq = 250 
    ch_names = ['C3', 'C4', 'P3', 'P4', 'O1', 'O2', 'EOG']
    ch_types = ['eeg'] * 6
    ch_types.append('eog')
    info = create_mne_info(s_freq=sampling_freq, ch_names=ch_names, ch_types=ch_types)
    print_affirm('MNE Info created')

    # remove eye blinks to filter data
    filtered_data_arr = remove_eye_blinks(data_arr)
    print_affirm('Eye blinks removed from EEG data')
    
    # find burg's coefficients after segmenting data : which will serve as the inputs
    segment_eeg_data(filtered_data_arr, data_arr, info, 'segment')

In [None]:
if __name__ == '__main__':
    main()