# EEG Preprocessor

In [1]:
# Import dependencies.
import mne
import os
import shutil # High-level file organisation.
from pymatreader import read_mat # Reading .mat files.
import numpy as np
import re
from itertools import chain
import ipywidgets as widgets
import matplotlib

## Create MNE data structures:

In [2]:
if not os.path.exists("eeg_data"):
    # Create the folder "eeg_data"
    os.makedirs("eeg_data")

data_folder = os.path.join(
    "eeg_data"
)

### Before running below, add your files to eeg_data. They should be .mat, .fif, .gdf or .edf files.

In [3]:
# Account for extensions by sorting into folder.

# Sort through folder.
for file in os.listdir(data_folder):
    name, ext = os.path.splitext(file)
    ext = ext[1:] # Remove '.' from extension name

    if ext == '':
        continue

    if os.path.exists(data_folder + "/" + ext):
        shutil.move(data_folder + "/" + file, data_folder + "/" + ext)

    else:
        os.makedirs(data_folder + "/" +  ext)
        shutil.move(data_folder + "/" + file, data_folder + "/" + ext)

In [4]:
# These regexp search patterns will be used for searching .mat file types:
regex_info = {
    "ch_names": r"\b(ch(?:annel)?_?names?)\b", # Different permutations of channel name.
    "sfreq": r"\b((?:samp(?:ling)?|s)_?Freq(?:uency)?)\b", # Different permutations of sampling frequency.
    "ch_types": r"\b(ch(?:annel)?_?types?)\b"
}

regex_misc = {
    "projs": r"\bproj(?:ector?s?)\b", # Different permutations of projectors.
    "events": r"\b(?:event\w*|marker\w*)\b", # Different permutations of events.
}

def simplify_dict(d):
    for key, value in d.items():
        if isinstance(value, list):
            # If the list contains nested lists, flatten them:
            while len(value) == 1 and isinstance(value[0], list):
                value = value[0]
            d[key] = value
        elif isinstance(value, dict):
            # If the value is a dictionary, simplify it recursively:
            d[key] = simplify_dict(value)
    return d

def regex_search_dict(data, regex_pattern):

    match = [] # Initialise match
    
    # Search dict:
    for key, value in data.items():
        if re.search(regex_pattern, key):
             match.append(data[key])
        if isinstance(value, dict): # Also search for nested dictionaries.
            match.append(regex_search_dict(value, regex_pattern))
    return match


def create_mne_from_mat_dict(mat_dict):
     
    info_parameters = {}
    misc_parameters = {}
    
    for pattern in regex_info:
        match = regex_search_dict(mat_dict, regex_info[pattern])
        info_parameters[pattern] = match

    for pattern in regex_misc:
        match = regex_search_dict(mat_dict, regex_misc[pattern])
        misc_parameters[pattern] = match
    
    simplify_dict(info_parameters)

    # Sampling frequency is taken as a float value, so if it's a list it'll be converted:
    if isinstance(info_parameters['sfreq'], list) and len(info_parameters['sfreq']) == 1:
        info_parameters['sfreq'] = float(info_parameters['sfreq'][0])
    
    data_patterns = r"\b(?:eeg|raw[_\s]?eeg|data|eeg[_\s]?data|data[_\s]?eeg)\b" # Different permutations of eeg data.
    data = regex_search_dict(mat_dict, data_patterns)
    data = np.array(data)
    data = data.squeeze() # Flatten any superfluous dimensions of 1.
    
    # This checks that channels are on the x-axis and samples are on the y-axis. Almost unilaterally, the number of samples will be greater than the number of channels (and if not, the sample size is insufficient).
    if data.shape[0] > data.shape[1]:
        data = data.T

    len1 = len(info_parameters['ch_names'])
    len2 = len(data)

    # If there aren't as many ch_names as there are channels in data, then ch_names will be defaulted to ['channel0', ...]:
    if len1 < len2:
        info_parameters['ch_names'] = []
        info_parameters['ch_names'] = [f"channel{i}" for i in range(len(data))]

    # If there's no input for channel types, they are assumed to all be EEG:
    if len(info_parameters['ch_types']) != len(info_parameters['ch_names']):
        info_parameters['ch_types'] = ["eeg"]*len(info_parameters['ch_names'])
        
    info = mne.create_info(**info_parameters) # Create the MNE info structure.
    raweeg = mne.io.RawArray(data, info) # Create the MNE raw object.
    
    events = np.array(misc_parameters["events"]).squeeze() # Obtain the flattened np array of events from mat_dict.
    
    return raweeg

In [5]:
raw_eeg_dict = {}

for folder in os.listdir(data_folder):
    path = os.path.join(data_folder, folder)
    for file in os.listdir(path):
        raweeg=os.path.join(data_folder, folder, file)

        # While .mat files are common, they don't have a standardised recording format, and so they need to be handled separately.
        if folder == "mat":
            matfile = read_mat(raweeg)
            mne_mat = create_mne_from_mat_dict(matfile)
            raw_eeg_dict.update({file: mne_mat})
        else:
            raw_eeg_dict.update({file: mne.io.read_raw(raweeg, verbose=False)})

Creating RawArray with float64 data, n_channels=22, n_times=689600
    Range : 0 ... 689599 =      0.000 ...  3447.995 secs
Ready.
Creating RawArray with float64 data, n_channels=22, n_times=667000
    Range : 0 ... 666999 =      0.000 ...  3334.995 secs
Ready.
Creating RawArray with float64 data, n_channels=22, n_times=669400
    Range : 0 ... 669399 =      0.000 ...  3346.995 secs
Ready.
Creating RawArray with float64 data, n_channels=22, n_times=689600
    Range : 0 ... 689599 =      0.000 ...  3447.995 secs
Ready.
Creating RawArray with float64 data, n_channels=22, n_times=666800
    Range : 0 ... 666799 =      0.000 ...  3333.995 secs
Ready.
Creating RawArray with float64 data, n_channels=22, n_times=664400
    Range : 0 ... 664399 =      0.000 ...  3321.995 secs
Ready.
Creating RawArray with float64 data, n_channels=22, n_times=664000
    Range : 0 ... 663999 =      0.000 ...  3319.995 secs
Ready.
Creating RawArray with float64 data, n_channels=22, n_times=667600
    Range : 0 ..

## At this point, select your desired EEG files to be processed:

In [20]:
name = widgets.TagsInput(
    value=list(raw_eeg_dict.keys())[0],
    allowed_tags=list(raw_eeg_dict.keys()),
    description='File(s)',
    allow_duplicates=False
)
name

TagsInput(value=['CLASubjectC1512233StLRHand.mat'], allow_duplicates=False, allowed_tags=['CLASubjectC1512233S…

## Apply filters:

In [21]:
filter_check = widgets.Checkbox(
    value=False,
    description='Apply bandpass',
    disabled=False
)

lowfreq = widgets.BoundedFloatText(
    value=8.0,
    min=0,
    max=150,
    step=0.1,
    description='l_freq filter:',
    disabled=False
)

highfreq = widgets.BoundedFloatText(
    value=35.0,
    min=0,
    max=150,
    step=0.1,
    description='h_freq filter:',
    disabled=False
)

display(filter_check)

Checkbox(value=False, description='Apply bandpass')

## Select the desired frequencies:

In [22]:
if filter_check.value == True:
    display(lowfreq)
    display(highfreq)

BoundedFloatText(value=8.0, description='l_freq filter:', max=150.0, step=0.1)

BoundedFloatText(value=35.0, description='h_freq filter:', max=150.0, step=0.1)

## Apply the desired filter:

In [32]:
if filter_check.value == True:
    freqs = [lowfreq.value, highfreq.value]
else:
    freqs = [None, None]

filtered_dict = {}
for i in name.value:
    test = raw_eeg_dict[i].copy().filter(l_freq=freqs[0], h_freq=freqs[1])
    filtered_dict.update({i: test})

Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 8 - 35 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 8.00
- Lower transition bandwidth: 2.00 Hz (-6 dB cutoff frequency: 7.00 Hz)
- Upper passband edge: 35.00 Hz
- Upper transition bandwidth: 8.75 Hz (-6 dB cutoff frequency: 39.38 Hz)
- Filter length: 331 samples (1.655 s)



[Parallel(n_jobs=1)]: Done  17 tasks      | elapsed:    0.4s


Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 8 - 35 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 8.00
- Lower transition bandwidth: 2.00 Hz (-6 dB cutoff frequency: 7.00 Hz)
- Upper passband edge: 35.00 Hz
- Upper transition bandwidth: 8.75 Hz (-6 dB cutoff frequency: 39.38 Hz)
- Filter length: 331 samples (1.655 s)



[Parallel(n_jobs=1)]: Done  17 tasks      | elapsed:    0.4s


## Select a processed file to plot:

In [40]:
plotFile = widgets.Dropdown(
    options=list(filtered_dict.keys()),
    value=list(filtered_dict.keys())[0],
    description='File(s)',
    disabled=False,
)
plotFile

Dropdown(description='File(s)', options=('CLASubjectC1512233StLRHand.mat', 'CLASubjectC1512163StLRHand.mat'), …

## Plot the selected file:

In [42]:
filtered_dict[plotFile.value].plot()

<mne_qt_browser._pg_figure.MNEQtBrowser at 0x7fa92fcdcf80>