In [1]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Activation, Permute, Dropout
from tensorflow.keras.layers import Conv2D, MaxPooling2D, AveragePooling2D
from tensorflow.keras.layers import SeparableConv2D, DepthwiseConv2D
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import SpatialDropout2D
from tensorflow.keras.regularizers import l1_l2
from tensorflow.keras.layers import Input, Flatten
from tensorflow.keras.constraints import max_norm
from tensorflow.keras import backend as K

import pandas as pd


import tensorflow as tf
use_gpu = tf.test.is_gpu_available(
    cuda_only=False,
    min_cuda_compute_capability=None
)

#!pip install pyriemann

import numpy as np

# mne imports
import mne
from mne import io
#from mne.datasets import sample

# EEGNet-specific imports
#from EEGModels import EEGNet
from tensorflow.keras import utils as np_utils
from tensorflow.keras.callbacks import ModelCheckpoint


# tools for plotting confusion matrices
from matplotlib import pyplot as plt

import random 
import math


"""
from keras.models import Sequential
from keras.layers.convolutional import Convolution2D
from keras.layers.convolutional import MaxPooling2D
from keras.layers.core import Activation
from keras.layers.core import Flatten
from keras.layers.core import Dense"""



'\nfrom keras.models import Sequential\nfrom keras.layers.convolutional import Convolution2D\nfrom keras.layers.convolutional import MaxPooling2D\nfrom keras.layers.core import Activation\nfrom keras.layers.core import Flatten\nfrom keras.layers.core import Dense'

In [2]:
!pip install mne
!pip install pyedflib




In [0]:
#import zipfile36 as zipfile
#with zipfile.ZipFile("Healthy Controls.zip", 'r') as zip_ref:
#    zip_ref.extractall("SZ Patients")
  

In [5]:
from google.colab import drive
#drive.mount('/content/drive')
drive.mount("/content/drive", force_remount=True)

Mounted at /content/drive


In [2]:
from tensorflow.keras import layers
#https://www.tensorflow.org/guide/keras/rnn

def LSTM(samples, time_steps, nb_features, chans, nb_classes):

    model=tf.keras.Sequential()
    
    print('chans: ', chans)
    print('nb_features: ', nb_features)
    model.add(layers.LSTM(128,
                          input_shape=(chans, time_steps)
            #input_shape=(chans* nb_features, time_steps),
            ))
    
    
    # Add a LSTM layer with 128 internal units.
    #model.add(layers.LSTM(128))



    model.add(Dense(32,kernel_initializer='he_uniform',activation='relu'))
    

    model.add(Dense(nb_classes,activation='softmax'))
    
    return model

In [5]:

ignore_list = ['s07']  #list of patient files that should be skipped
sample_size = 2

#file_test = pd.read_csv("/content/drive/My Drive/Colab Notebooks/Healthy Controls/h01.edf")


In [3]:
import pyedflib
mne.set_log_level("WARNING")

parent_dir = '../../Data/Raw/'

# get the minimum length of the files
def get_minimum_duration(group_directory_name, patient_group_file_prefix):
    file_durations = []
    for i in range (1, 15): # reading 14 files
        patient_id = "{}{:02d}".format(patient_group_file_prefix, i)
        file_name = parent_dir +'{}/{}.edf'.format(group_directory_name, patient_id)
        print(file_name)
        f = pyedflib.EdfReader(file_name)
        file_durations.append(f.file_duration)
        f.close()
    return(min(file_durations))

# modified based on https://stackoverflow.com/a/48704557/2466781
def chunk(seq, size):
    sl = len(seq) - (len(seq)%size) #exclude values that will be out of range
    r = [pd.DataFrame(seq[pos:pos + size]) for pos in range(0, sl, size)]
    return r

# modified version of process_patient_group in older notebooks
# Uses the raw EDF files and converts to dataframe, dropping the first 150 and last 30 seconds of the shortest  file
# All other files are trimmed similarly to produce the same size
# Adapted from page 1 of https://buildmedia.readthedocs.org/media/pdf/pyedflib/latest/pyedflib.pdf
def process_patient_group(group_directory_name, patient_group_file_prefix, 
                          minimum_original_duration, 
                          plot_channels = False,
                         channels = ['F8', 'F7', 'F4', 'F3', 'Fz']):
    meta_df = pd.DataFrame()
    meta = []
    patient_id_list = []

    for i in range (1, 15): # reading 14 files
        patient_id = "{}{:02d}".format(patient_group_file_prefix, i)
        patient_id_list.append(patient_id)
        
        file_name = parent_dir + '{}/{}.edf'.format(group_directory_name, patient_id)
        data = mne.io.read_raw_edf(file_name)
        df = data.to_data_frame()
        df2 = df[channels]
        ## based on visual inspection, drop the first 150 seconds
        df2 = df2[150: (minimum_original_duration-30)]
        f = pyedflib.EdfReader(file_name)
        f.close()
        if patient_id not in ignore_list:
            meta_df = meta_df.append(df2)
            
    batches = chunk(meta_df, sample_size)

    for batch in batches:
        #display(np.asarray(batch.values).shape)
        meta.append([np.asarray(batch.values)])
           
                    
    return meta
        

In [6]:
# Retrieve patient data, using a time window determined by the shortest recording
# patient s07 is removed

minimum_duration = min(get_minimum_duration("Healthy Controls", "h"), get_minimum_duration('SZ Patients', 's'))
print('minimum duration: ', minimum_duration)


target_channels = ['T4', 'T6', 'O2', 'T3', 'T5', 'O1',
                   'C4', 'P4', 'C3', 'P3', 'Cz', 'Pz']

print("Healthy Controls")
hc_data = process_patient_group('Healthy Controls', 'h', minimum_duration, channels=target_channels)
display(np.asarray(hc_data).shape)


print('Sz Patients')
sz_data = np.asarray(process_patient_group('SZ Patients', 's', minimum_duration, channels=target_channels))
display(np.asarray(sz_data).shape)


##### combine groups and create Y (labels)

X =  np.concatenate((hc_data, sz_data), axis=0)
display('input size: ', X.shape)
y = ([0] * len(hc_data)) +( [1] * len(sz_data))




../../Data/Raw/Healthy Controls/h01.edf
../../Data/Raw/Healthy Controls/h02.edf
../../Data/Raw/Healthy Controls/h03.edf
../../Data/Raw/Healthy Controls/h04.edf
../../Data/Raw/Healthy Controls/h05.edf
../../Data/Raw/Healthy Controls/h06.edf
../../Data/Raw/Healthy Controls/h07.edf
../../Data/Raw/Healthy Controls/h08.edf
../../Data/Raw/Healthy Controls/h09.edf
../../Data/Raw/Healthy Controls/h10.edf
../../Data/Raw/Healthy Controls/h11.edf
../../Data/Raw/Healthy Controls/h12.edf
../../Data/Raw/Healthy Controls/h13.edf
../../Data/Raw/Healthy Controls/h14.edf
../../Data/Raw/SZ Patients/s01.edf
../../Data/Raw/SZ Patients/s02.edf
../../Data/Raw/SZ Patients/s03.edf
../../Data/Raw/SZ Patients/s04.edf
../../Data/Raw/SZ Patients/s05.edf
../../Data/Raw/SZ Patients/s06.edf
../../Data/Raw/SZ Patients/s07.edf
../../Data/Raw/SZ Patients/s08.edf
../../Data/Raw/SZ Patients/s09.edf
../../Data/Raw/SZ Patients/s10.edf
../../Data/Raw/SZ Patients/s11.edf
../../Data/Raw/SZ Patients/s12.edf
../../Data/Raw/SZ Pa

(3920, 1, 2, 12)

Sz Patients


(3640, 1, 2, 12)

'input size: '

(7560, 1, 2, 12)

In [0]:


#from google.colab import files
#uploaded = files.upload()

In [0]:
# create the /tmp directory if it doesn't already exist
import os
if not os.path.exists('tmp'):
    os.makedirs('tmp')



In [0]:
# get a list of randomly selected sets of numbers based on a range
# the proportion of values selected for each set is determined by the ratio_array
def get_mixed_indexes_for_ml_train_test(length, ratios_array):
    input_indexes = range(0, length)
    output_indexes = []
    for ratio in ratios_array:
        selection = random.choices(input_indexes, k=math.floor(ratio * length))
        input_indexes = [i for i in input_indexes if i not in selection]
        output_indexes.append(selection)
    return output_indexes
    



In [19]:
## TODO - auto mkdir if not exist for TF output : /tmp

# adapted from https://github.com/vlawhern/arl-eegmodels/blob/master/examples/ERP.py

##################### Process, filter and epoch the data ######################
#data_path = sample.data_path()



# Set parameters and read data
"""raw_fname = data_path + '/MEG/sample/sample_audvis_filt-0-40_raw.fif'
event_fname = data_path + '/MEG/sample/sample_audvis_filt-0-40_raw-eve.fif'
tmin, tmax = -0., 1
event_id = dict(aud_l=1, aud_r=2, vis_l=3, vis_r=4)

# Setup for reading the raw data
raw = io.Raw(raw_fname, preload=True, verbose=False)
raw.filter(2, None, method='iir')  # replace baselining with high-pass
events = mne.read_events(event_fname)

raw.info['bads'] = ['MEG 2443']  # set bad channels
picks = mne.pick_types(raw.info, meg=False, eeg=True, stim=False, eog=False,
                       exclude='bads')
display(raw)
# Read epochs
epochs = mne.Epochs(raw, events, event_id, tmin, tmax, proj=False,
                    picks=picks, baseline=None, preload=True, verbose=False)
labels = epochs.events[:, -1]
"""


kernels, chans, samples = 1, 12, sample_size
full_size = len(X)
train_idxs, validate_idxs, test_idxs = get_mixed_indexes_for_ml_train_test(len(X), [.70, 0.15, 0.15])



X_train      = X[train_idxs][0:,]
Y_train      = np.asarray(y)[train_idxs]
X_validate   = X[validate_idxs][0:,]
Y_validate   = np.asarray(y)[validate_idxs]
X_test       = X[test_idxs][0:,]
Y_test       = np.asarray(y)[test_idxs]

############################# EEGNet portion ##################################

# convert labels to one-hot encodings.
Y_train      = np_utils.to_categorical(Y_train, num_classes=2)
Y_validate   = np_utils.to_categorical(Y_validate, num_classes=2)
Y_test       = np_utils.to_categorical(Y_test, num_classes=2)


time_steps = 2

X_train      = X_train.reshape(X_train.shape[0],   chans, time_steps,)
X_validate   = X_validate.reshape(X_validate.shape[0], chans, time_steps)
X_test       = X_test.reshape(X_test.shape[0], chans, time_steps)


 
print('X_train shape:', X_train.shape)
print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')

# configure the EEGNet-8,2,16 model with kernel length of 32 samples (other 
# model configurations may do better, but this is a good starting point)
#model = EEGNet_CPU(nb_classes = 2, Chans = chans, Samples = samples, #
#               dropoutRate = 0.5, kernLength = 32, F1 = 64, D = 2, F2 = 32, 
#               dropoutType = 'Dropout')


#LSTM(samles, time_steps=20, input_size=30, chans)
model = LSTM(samples=samples, time_steps=time_steps, chans=chans, nb_features=1, nb_classes=2, )
adam = tf.keras.optimizers.Adam(lr=0.000005, 
                                    beta_1=0.99,
    beta_2=0.999,
    epsilon=1e-07,)
#cnn_model.compile(optimizer=adam,
#                loss='binary_crossentropy',
#                metrics=['accuracy'])

# compile the model and set the optimizers
model.compile(loss='categorical_crossentropy', optimizer=adam, 
              metrics = ['accuracy'])

# count number of parameters in the model
numParams    = model.count_params()    

# set a valid path for your system to record model checkpoints
checkpointer = ModelCheckpoint(filepath='/tmp/LSTM_checkpoint.h5', verbose=1,
                               save_best_only=True)

###############################################################################
# if the classification task was imbalanced (significantly more trials in one
# class versus the others) you can assign a weight to each class during 
# optimization to balance it out. This data is approximately balanced so we 
# don't need to do this, but is shown here for illustration/completeness. 
###############################################################################



X_train shape: (5292, 12, 2)
5292 train samples
1134 test samples
chans:  12
nb_features:  1
Instructions for updating:
If using Keras pass *_constraint arguments to layers.


In [21]:
# the syntax is {class_1:weight_1, class_2:weight_2,...}. Here just setting
# the weights all to be 1
class_weights = {0:1, 1:1, 2:1, 3:1}

################################################################################
# fit the model. Due to very small sample sizes this can get
# pretty noisy run-to-run, but most runs should be comparable to xDAWN + 
# Riemannian geometry classification (below)
################################################################################
fittedModel = model.fit(X_train, Y_train, batch_size = 32, epochs = 600, 
                        verbose = 2, validation_data=(X_validate, Y_validate),
                        callbacks=[checkpointer])#, class_weight = class_weights)

# load optimal weights
model.load_weights('/tmp/LSTM_checkpoint.h5')

###############################################################################
# can alternatively used the weights provided in the repo. If so it should get
# you 93% accuracy. Change the WEIGHTS_PATH variable to wherever it is on your
# system.
###############################################################################

# WEIGHTS_PATH = /path/to/EEGNet-8-2-weights.h5 
# model.load_weights(WEIGHTS_PATH)

###############################################################################
# make prediction on test set.
###############################################################################

probs       = model.predict(X_test)
preds       = probs.argmax(axis = -1)  
acc         = np.mean(preds == Y_test.argmax(axis=-1))
print("Classification accuracy: %f " % (acc))



Train on 5292 samples, validate on 1134 samples
Epoch 1/600

Epoch 00001: val_loss improved from 0.63807 to 0.63168, saving model to /tmp/LSTM_checkpoint.h5
5292/5292 - 4s - loss: 0.6394 - acc: 0.6852 - val_loss: 0.6317 - val_acc: 0.7046
Epoch 2/600

Epoch 00002: val_loss improved from 0.63168 to 0.62837, saving model to /tmp/LSTM_checkpoint.h5
5292/5292 - 4s - loss: 0.6361 - acc: 0.6893 - val_loss: 0.6284 - val_acc: 0.7090
Epoch 3/600

Epoch 00003: val_loss improved from 0.62837 to 0.62518, saving model to /tmp/LSTM_checkpoint.h5
5292/5292 - 4s - loss: 0.6329 - acc: 0.6939 - val_loss: 0.6252 - val_acc: 0.7152
Epoch 4/600

Epoch 00004: val_loss improved from 0.62518 to 0.62198, saving model to /tmp/LSTM_checkpoint.h5
5292/5292 - 4s - loss: 0.6297 - acc: 0.6992 - val_loss: 0.6220 - val_acc: 0.7187
Epoch 5/600

Epoch 00005: val_loss improved from 0.62198 to 0.61896, saving model to /tmp/LSTM_checkpoint.h5
5292/5292 - 5s - loss: 0.6265 - acc: 0.6997 - val_loss: 0.6190 - val_acc: 0.7152
Ep

NameError: ignored

In [22]:
!pip freeze

absl-py==0.9.0
alabaster==0.7.12
albumentations==0.1.12
altair==4.0.0
asgiref==3.2.3
astor==0.8.1
astropy==4.0
atari-py==0.2.6
atomicwrites==1.3.0
attrs==19.3.0
audioread==2.1.8
autograd==1.3
Babel==2.8.0
backcall==0.1.0
backports.tempfile==1.0
backports.weakref==1.0.post1
beautifulsoup4==4.6.3
bleach==3.1.0
blis==0.2.4
bokeh==1.4.0
boto==2.49.0
boto3==1.10.47
botocore==1.13.47
Bottleneck==1.3.1
branca==0.3.1
bs4==0.0.1
bz2file==0.98
cachetools==4.0.0
certifi==2019.11.28
cffi==1.13.2
chainer==6.5.0
chardet==3.0.4
chart-studio==1.0.0
Click==7.0
cloudpickle==1.2.2
cmake==3.12.0
colorlover==0.3.0
community==1.0.0b1
contextlib2==0.5.5
convertdate==2.2.0
coverage==3.7.1
coveralls==0.5
crcmod==1.7
cufflinks==0.17.0
cvxopt==1.2.3
cvxpy==1.0.25
cycler==0.10.0
cymem==2.0.3
Cython==0.29.14
daft==0.0.4
dask==2.9.1
dataclasses==0.7
datascience==0.10.6
decorator==4.4.1
defusedxml==0.6.0
descartes==1.1.0
dill==0.3.1.1
distributed==1.25.3
Django==3.0.2
dlib==19.18.0
dm-sonnet==1.35
docopt==0.6.2
docu

In [23]:
from platform import python_version

print(python_version())

3.6.9
