# EEG Classification
updated: Sep. 01, 2018

Data: https://www.physionet.org/pn4/eegmmidb/

## 1. Data Downloads

### Warning: Executing these blocks will automatically create directories and download datasets.

In [19]:
# System
import requests
import re
import os
import pathlib
import urllib

# Modeling & Preprocessing
from keras.layers import Conv2D, Dense, TimeDistributed, Dropout, Flatten, Activation, BatchNormalization, LSTM
from keras.models import Sequential, model_from_json, Model
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from keras import initializers, optimizers, callbacks, models

# Essential Data Handling
import numpy as np
import pandas as pd

# Get Paths
from glob import glob

# EEG package
from mne import find_events, Epochs, concatenate_raws, pick_types
from mne.channels import read_montage
from mne.io import read_raw_edf

In [4]:
CONTEXT = 'pn4/'
MATERIAL = 'eegmmidb/'
URL = 'https://www.physionet.org/' + CONTEXT + MATERIAL

# Change this directory according to your setting
USERDIR = '/Users/Jimmy/data/PhysioNet/'

page = requests.get(URL).text
FOLDERS = sorted(list(set(re.findall(r'S[0-9]+', page))))

URLS = [URL+x+'/' for x in FOLDERS]

In [5]:
# Warning: Executing this block will create folders
for folder in FOLDERS:
    pathlib.Path(USERDIR +'/'+ folder).mkdir(parents=True, exist_ok=True)

In [None]:
# Warning: Executing this block will start downloading data
for i, folder in enumerate(FOLDERS):
    page = requests.get(URLS[i]).text
    subs = list(set(re.findall(r'S[0-9]+R[0-9]+', page)))
    
    print('Working on {}, {:.1%} completed'.format(folder, (i+1)/len(FOLDERS)))
    for sub in subs:
        urllib.request.urlretrieve(URLS[i]+sub+'.edf', os.path.join(USERDIR, folder, sub+'.edf'))

## Data Description

Subjects performed different motor/imagery tasks while 64-channel EEG were recorded using the BCI2000 system (http://www.bci2000.org). Each subject performed 14 experimental runs: two one-minute baseline runs (one with eyes open, one with eyes closed), and three two-minute runs of each of the four following tasks:
A target appears on either the left or the right side of the screen. The subject opens and closes the corresponding fist until the target disappears. Then the subject relaxes.
A target appears on either the left or the right side of the screen. The subject imagines opening and closing the corresponding fist until the target disappears. Then the subject relaxes.
A target appears on either the top or the bottom of the screen. The subject opens and closes either both fists (if the target is on top) or both feet (if the target is on the bottom) until the target disappears. Then the subject relaxes.
A target appears on either the top or the bottom of the screen. The subject imagines opening and closing either both fists (if the target is on top) or both feet (if the target is on the bottom) until the target disappears. Then the subject relaxes.

The data are provided here in EDF+ format (containing 64 EEG signals, each sampled at 160 samples per second, and an annotation channel). For use with PhysioToolkit software, rdedfann generated a separate PhysioBank-compatible annotation file (with the suffix .event) for each recording. The .event files and the annotation channels in the corresponding .edf files contain identical data.

## 2. Raw Data Import

I will use a EEG data handling package named MNE (https://martinos.org/mne/stable/index.html) to import raw data and annotation for events from edf files. This package also provides essential signal analysis features, e.g. band-pass filtering. The raw data were filtered using 1Hz of high-pass filter.

In this research, there are 5 classes for the data: imagined motion of right fist, left fist, both fists, both feet, and rest with eyes closed. A data from one of the 109 subjects was excluded as the record was severely corrupted.

In [3]:
# Get file paths
PATH = '/Users/jimmy/data/PhysioNet/'
SUBS = glob(PATH + 'S[0-9]*')
FNAMES = sorted([x[-4:] for x in SUBS])

# Remove subject #89 with damaged data
FNAMES.remove('S089')

In [4]:
def get_data(subj_num=FNAMES, epoch_sec=0.0625):
    """ Import each subject`s trials and make a 3D array
        Output shape: (Trial*Channel*TimeFrames)
        
        Some edf+ files recorded at low sampling rate, 128Hz, are excluded. 
        Majority was sampled at 160Hz.
        
        epoch_sec: time interval for one segment of mashes
        """
    
    # Event codes mean different actions for two groups of runs
    run_type_0 = '02'.split(',')
    run_type_1 = '04,08,12'.split(',')
    run_type_2 = '06,10,14'.split(',')

    # To calculated completion rate
    count = 0
    
    # Initiate X, y
    X = []
    y = []
    
    # fixed numbers
    nChan = 64 
    sfreq = 160
    sliding = epoch_sec/2 
    
    # Sub-function to assign X and X, y
    def append_X(n_segments, old_x):
        new_x = old_x + [data[:, int(sfreq*sliding*n):int(sfreq*sliding*(n+2))] for n in range(n_segments)\
                     if data[:, int(sfreq*sliding*n):int(sfreq*sliding*(n+2))].shape==(nChan, int(sfreq*epoch_sec))]
        return new_x
    
    def append_X_Y(run_type, event, old_x, old_y):
        # Number of sliding windows
        n_segments = int(event[1]/epoch_sec)*2-1
        
        # Instantiate new_x, new_y
        new_y = old_y
        new_x = old_x
        
        # y assignment
        if run_type == 1:
            if event[2] == 'T1':
                new_y = old_y + [1]*n_segments
                new_x = append_X(n_segments, old_x)

            elif event[2] == 'T2':
                new_y = old_y + [2]*n_segments
                new_x = append_X(n_segments, old_x)
        
        if run_type == 2:
            if event[2] == 'T1':
                new_y = old_y + [3]*n_segments
                new_x = append_X(n_segments, old_x)
            
            elif event[2] == 'T2':
                new_y = old_y + [4]*n_segments
                new_x = append_X(n_segments, old_x)
        
        return new_x, new_y
    
    # Iterate over subj_num: S001, S002, S003...
    for subj in subj_num:
        # Return completion rate
        count+=1
        if len(subj_num)//count == 10:
            print('working on {}, {:.1%} completed'.format(subj, count/len(subj_num)))

        # Get file names
        fnames = glob(os.path.join(PATH, subj, subj+'R*.edf'))
        fnames = [name for name in fnames if name[-6:-4] in run_type_0+run_type_1+run_type_2]
        
        for i, fname in enumerate(fnames):
            
            # Import data into MNE raw object
            raw = read_raw_edf(fname, preload=True, verbose=False)
            picks = pick_types(raw.info, eeg=True)
            
            if raw.info['sfreq'] != 160:
                print(f'{subj} is sampled at 128Hz so will be excluded.')
                break
            
            # High-pass filtering
            raw.filter(l_freq=1, h_freq=None, picks=picks)
            
            # Get annotation
            events = raw.find_edf_events()
            
            # Get data
            data = raw.get_data(picks=picks)
            
            # Number of this run
            which_run = fname[-6:-4]
            
            """ Assignment Starts """ 
            # run 1 - baseline (eye closed)
            if which_run in run_type_0:

                # Number of sliding windows
                n_segments = int((raw.n_times/(epoch_sec*sfreq))*2-1)
                
                # Append 0`s based on number of windows
                y.extend([0]*n_segments)
                X = append_X(n_segments, X)
                    
            # run 4,8,12 - imagine opening and closing left or right fist    
            elif which_run in run_type_1:
                
                for i, event in enumerate(events):
                    X, y = append_X_Y(run_type=1, event=event, old_x=X, old_y=y)
                        
            # run 6,10,14 - imagine opening and closing both fists or both feet
            elif which_run in run_type_2:
                   
                for i, event in enumerate(events):         
                    X, y = append_X_Y(run_type=2, event=event, old_x=X, old_y=y)
                        
    X = np.stack(X)
    y = np.array(y).reshape((-1,1))
    return X, y

In [8]:
''' 
This code is to test MNE raw object

subj = FNAMES[0]
fnames = glob(os.path.join(PATH, subj, subj+'R*'+'.edf'))
raw = read_raw_edf(fnames[5], preload=True, verbose=False)
'''

In [5]:
X,y = get_data(FNAMES, epoch_sec=0.0625)

working on S010, 9.3% completed
EDF+ with overlapping events are not fully supported
EDF+ with overlapping events are not fully supported


  raw = read_raw_edf(fname, preload=True, verbose=False)
  raw = read_raw_edf(fname, preload=True, verbose=False)


S088 is sampled at 128Hz so will be excluded.
EDF+ with overlapping events are not fully supported
EDF+ with overlapping events are not fully supported


  raw = read_raw_edf(fname, preload=True, verbose=False)
  raw = read_raw_edf(fname, preload=True, verbose=False)


S092 is sampled at 128Hz so will be excluded.
EDF+ with overlapping events are not fully supported
EDF+ with overlapping events are not fully supported


  raw = read_raw_edf(fname, preload=True, verbose=False)
  raw = read_raw_edf(fname, preload=True, verbose=False)


S100 is sampled at 128Hz so will be excluded.


In [10]:
print(X.shape)
print(y.shape)

(1425410, 64, 10)
(1425410, 1)


## 3. Data Preprocessing

The original goal of applying neural networks is to exclude hand-crafted algorithms & preprocessing as much as possible. I did not use any proprecessing techniques further than standardization to build an end-to-end classifer from the dataset

In [None]:
# y backup
ori_y = y

# y encoding
oh = OneHotEncoder()
y = oh.fit_transform(ori_y).toarray()

# Shuffle trials
np.random.seed(43)
trials = X.shape[0]
shuffle_indices = np.random.permutation(trials)
X = X[shuffle_indices]
y = y[shuffle_indices]

# Test set seperation
test_ratio = 0.2
train_size = int(trials*(1-test_ratio))
X_train, X_test, y_train, y_test = X[:train_size,:,:], X[train_size:,:,:],\
                                    y[:train_size,:], y[train_size:,:]
    
# Z-score Normalization
def scale_data(X):
    shape = X.shape
    scaler = StandardScaler()
    scaled_X = np.zeros((shape[0], shape[1], shape[2]))
    for i in range(shape[0]):
        for z in range(shape[2]):
            scaled_X[i, :, z] = np.squeeze(scaler.fit_transform(X[i, :, z].reshape(-1, 1)))
        if i%int(shape[0]/10) == 0:
            print('{:.2%} done'.format((i+1)/shape[0]))   
    return scaled_X
            
X_train, X_test  = scale_data(X_train), scale_data(X_test)

As the EEG recording instrument has 3D locations over the subjects\` scalp, it is essential for the model to learn from the spatial pattern as well as the temporal pattern. I transformed the data into 2D meshes that represents the locations of the electrodes so that stacked convolutional neural networks can grasp the spatial information.

In [None]:
## Make 2D meshes
# Import one raw EEG data to get electrode locations
subj = FNAMES[0]
fnames = glob(os.path.join(PATH, subj, subj+'R*'+'.edf'))
raw = read_raw_edf(fnames[3], preload=True, verbose=False)
ch_names = raw.info['ch_names'][:-1]

# 'ch_index' is a dictionary - keys: electrodes, vals: column index of electrodes
ch_index = {re.findall("\w+[0-9]?", i)[0]:ch_names.index(i) for i in ch_names}; ch_index

In [None]:
def convert_mesh(X, ch_index=ch_index):
    
    mesh = np.zeros((X.shape[0], X.shape[2], 10, 11))
    X = np.swapaxes(X, 1, 2)
    
    # 1st line
    mesh[:, :, 0, 4:7] = X[:,:,21:24]; print('1st finished')
    
    # 2nd line
    mesh[:, :, 1, 3:8] = X[:,:,24:29]; print('2nd finished')
    
    # 3rd line
    mesh[:, :, 2, 1:10] = X[:,:,29:38]; print('3rd finished')
    
    # 4th line
    mesh[:, :, 3, 1:10] = np.concatenate((X[:,:,ch_index['Ft7']].reshape(-1, X.shape[1], 1),\
                                          X[:,:,0:7], X[:,:,ch_index['Ft8']].reshape(-1, X.shape[1], 1)), axis=2)
    print('4th finished')
    
    # 5th line
    mesh[:, :, 4, 0:11] = np.concatenate((X[:,:,(ch_index['T9'],ch_index['T7'])],\
                                        X[:,:,7:14], X[:,:,(ch_index['T8'],ch_index['T10'])]), axis=2)
    print('5th finished')
    
    # 6th line
    mesh[:, :, 5, 1:10] = np.concatenate((X[:,:,ch_index['Tp7']].reshape(-1, X.shape[1], 1),\
                                        X[:,:,14:21], X[:,:,ch_index['Tp8']].reshape(-1, X.shape[1], 1)), axis=2)
    print('6th finished')
               
    # 7th line
    mesh[:, :, 6, 1:10] = X[:,:,46:55]; print('7th finished')
    
    # 8th line
    mesh[:, :, 7, 3:8] = X[:,:,55:60]; print('8th finished')
    
    # 9th line
    mesh[:, :, 8, 4:7] = X[:,:,60:63]; print('9th finished')
    
    # 10th line
    mesh[:, :, 9, 5] = X[:,:,63]; print('10th finished')
    
    return mesh

In [None]:
# Make meshes - Dimension: (Sample * Channel * Width * Height)
X_train, X_test = convert_mesh(X_train), convert_mesh(X_test)

In [None]:
# Check out the shape of the mesh
np.set_printoptions(precision=2, linewidth=100)
X_train[1][0]

## 4. Modeling - Time-Distributed CNN + RNN

Training Plan:

+ 4 GPU units (Nvidia Tesla P100) were used to train this neural network.
+ Instead of training the whole model at once, I trained the first block (CNN) first. Then using the trained parameters as initial values, I trained the next blocks step-by-step. This approach can greatly reduce the time required for training and help avoiding falling into local minimums.
+ The first blocks (CNN) can be applied for other EEG classification models as a pre-trained base.

In [None]:
# Make another dimension, 1, to apply CNN for each time frame.
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], X_train.shape[2], X_train.shape[3], 1)
X_test = X_test.reshape(X_test.shape[0], X_train.shape[1], X_train.shape[2], X_train.shape[3], 1)

In [20]:
## Complicated Model - the same as Zhang`s
input_shape = (10, 10, 11, 1)
lecun = initializers.lecun_normal(seed=42)

# TimeDistributed Wrapper
def timeDist(layer, prev_layer, name):
    return layers.TimeDistributed(layer, name=name)(prev_layer)
    

# Input layer
inputs = layers.Input(shape=input_shape)

# Convolutional layers block
x = timeDist(Conv2D(32, (3,3), padding='same', 
                    data_format='channels_last', kernel_initializer=lecun), inputs, name='CNN1')
x = BatchNormalization(name='batch1')(x)
x = Activation('elu', name='act1')(x)
x = timeDist(Conv2D(64, (3,3), padding='same', data_format='channels_last', kernel_initializer=lecun), x, name='CNN2')
x = BatchNormalization(name='batch2')(x)
x = Activation('elu', name='act2')(x)
x = timeDist(Conv2D(128, (3,3), padding='same', data_format='channels_last', kernel_initializer=lecun), x, name='CNN3')
x = BatchNormalization(name='batch3')(x)
x = Activation('elu', name='act3')(x)
x = timeDist(Flatten(), x, name='flatten')

# Fully connected layer block
y = Dense(1024, kernel_initializer=lecun, name='FC')(x)
y = Dropout(0.5, name='dropout1')(y)
y = BatchNormalization(name='batch4')(y)
y = Activation(activation='elu')(y)

# Recurrent layers block
z = LSTM(64, kernel_initializer=lecun, return_sequences=True, name='LSTM1')(y)
z = LSTM(64, kernel_initializer=lecun, name='LSTM2')(z)

# Fully connected layer block
h = Dense(1024, kernel_initializer=lecun, activation='elu', name='FC2')(z)
h = Dropout(0.5, name='dropout2')(h)

# Output layer
outputs = Dense(5, activation='softmax')(h)

# Model compile
model = Model(inputs=inputs, outputs=outputs)
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_12 (InputLayer)        (None, 10, 10, 11, 1)     0         
_________________________________________________________________
CNN1 (TimeDistributed)       (None, 10, 10, 11, 32)    320       
_________________________________________________________________
batch1 (BatchNormalization)  (None, 10, 10, 11, 32)    128       
_________________________________________________________________
act1 (Activation)            (None, 10, 10, 11, 32)    0         
_________________________________________________________________
CNN2 (TimeDistributed)       (None, 10, 10, 11, 64)    18496     
_________________________________________________________________
batch2 (BatchNormalization)  (None, 10, 10, 11, 64)    256       
_________________________________________________________________
act2 (Activation)            (None, 10, 10, 11, 64)    0         
__________

In [None]:
# Load a model to transfer pre-trained parameters
trans_model = model.load('CNN_3blocks.h5')

# Transfer learning - parameter copy & paste
which_layer = 'CNN1,CNN2,CNN3,batch1,batch2,batch3'.split(',')
layer_names = [layer.name for layer in model.layers]
trans_layer_names = [layer.name for layer in tran_model.layers]

for layer in which_layer:
    ind = layer_names.index(layer)
    trans_ind = trans_layer_names.index(layer)
    model.layers[ind].set_weights(trans_model.layers[trans_ind].get_weights())
    
for layer in model.layers[:9]: # Freeze the first 9 layers(CNN block)
    layer.trainable = False

In [None]:
# Turn on multi-GPU mode
model = multi_gpu_model(model, gpus=4)

In [None]:
callbacks_list = [callbacks.ModelCheckpoint('model.h5', save_best_only=True, monitor='val_loss'),
                 callbacks.EarlyStopping(monitor='acc', patience=3),
                 callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5),
                 callbacks.TensorBoard(log_dir='./my_log_dir/', histogram=1)]

# Start training
model.compile(loss='categorical_crossentropy', optimizer=ptimizers.adam(lr=0.001), metrics=['acc'])
model.fit(X_train, y_train, batch_size=64, epochs=5000, validation_data=(X_test, y_test))