In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
import os
import matplotlib.pyplot as plt
from scipy import signal
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, MaxPool2D, Flatten, BatchNormalization, Dropout, Lambda, AveragePooling2D, Softmax, ReLU, Activation
from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import classification_report, confusion_matrix
import time
from math import floor,inf
from sklearn.utils import shuffle
import keras_tuner as kt
from tensorflow import keras

In [2]:
###Check to see if GPU was available
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

Num GPUs Available:  1


In [None]:
###Enter tasks to be classified as a list e.g. left hand and right hand MI
labels = ['Left Hand', 'Right Hand']

In [4]:
###Maps class labels to numbers starting from 0
def map_labels(label):
    for i in range(len(labels)):
        if label == labels[i]:
            label = i
    return label

In [5]:
###Function to process raw EEG data and create training and validation datasets
def create_data(data_folder, filtered, samples, overlap, classes):
    b,a = signal.butter(2, Wn = [48,52], btype = 'bandstop', fs = 250)
    d,c = signal.butter(2, Wn = [4,38], btype = 'bandpass', fs = 250)
    
    data_dir = os.path.join(os.getcwd(),data_folder)
    
    mi_tasks=classes
    
    directories = {}
    for i in range(len(mi_tasks)):
        directories[mi_tasks[i]] = data_dir + '\\' + mi_tasks[i]

    samples = samples
    overlap = overlap
    unique = floor((1-overlap)*samples)
    x=[]
    y=[]
    all_data = {}
    filtered_data = {}

    for task in mi_tasks:
        all_data[task] = {}
        filtered_data[task] = {}    

    for i in directories:
        folders = os.listdir(directories[i])
        count = 0
        for w in range(len(folders)):
            current_dir = directories[i] + '\\' + folders[w]
            files = os.listdir(current_dir)
            for file in files:
                data = pd.read_csv(current_dir + '\\' + file, header = None)
                eeg_data = data.iloc[1:,1:9]
                if eeg_data.size >= 3250:
                    for div in range(int(eeg_data.size//(eeg_data.shape[1]*unique))):
                        if div*unique +samples <= eeg_data.size/eeg_data.shape[1]:
                            all_data[i][count]= eeg_data[div*unique:div*unique + samples]
                            if filtered:
                                to_filter = all_data[i][count]
                                filtered_data=pd.DataFrame(columns = range(0,eeg_data.shape[1]))
                                for chan in range(to_filter.shape[1]):
                                    notch_filtered_chan = signal.filtfilt(b,a,to_filter.iloc[:,chan], padtype='even')
                                    filtered_chan = signal.filtfilt(d,c,notch_filtered_chan, padtype='even')
                                    filtered_data[chan] = filtered_chan
                                x.append(np.transpose(filtered_data.to_numpy()))
                                    
                            else:
                                x.append(np.transpose(all_data[i][count].to_numpy()))
                            
                            y.append(i)
                            count += 1

                else:
                    print('* not accepted *')
                    print(file)
    x=np.array(x)
    x=x.reshape(x.shape[0],x.shape[1],x.shape[2],1)
    y = list(map(map_labels,y))
    x, y = shuffle(x,y, random_state=42)
    return all_data, x, y

In [6]:
train_data_folder = '05to20-08 Active 8chan FPZ Bias'
test_data_folder = '23&24-08 Active 8chan FPZ Bias'

In [8]:
#Create training and validation datasets with 2 sec crops with 90% overlap
train_data, x_train, y_train = create_data(train_data_folder, True, 500, 0.90, ['Left Hand', 'Right Hand'])
test_data, x_test, y_test = create_data(test_data_folder, True, 500, 0.90, ['Left Hand', 'Right Hand'])
print(x_train.shape)
print(x_test.shape)

(4336, 8, 500, 1)
(1441, 8, 500, 1)


In [13]:
###Class labels one-hot encoded
y_cat_train = to_categorical(y_train)
y_cat_test = to_categorical(y_test)

In [14]:
y_cat_train

array([[0., 1.],
       [1., 0.],
       [0., 1.],
       ...,
       [0., 1.],
       [0., 1.],
       [1., 0.]], dtype=float32)

In [15]:
x_train.shape

(4336, 8, 500, 1)

In [17]:
####  MODEL BUILDER with hyperparameter search spaces####
def build_model(hp):
    model = Sequential()
    
    hp_filters = hp.Int('Filters', min_value=10,max_value=160,step=10)
    hp_kern_len = hp.Int('Kernel1 Length', min_value=10, max_value=250, step=10)
    model.add(Conv2D(filters=hp_filters, kernel_size=(1,hp_kern_len), input_shape=(x_train.shape[1],x_train.shape[2],1))) #, activation = 'relu'))
    model.add(Conv2D(filters=hp_filters, kernel_size=(8,1), activation = 'elu'))
    hp_momentum = hp.Float('Momentum', min_value=0.05, max_value=0.95, step=0.05)
    model.add(BatchNormalization(momentum=hp_momentum, epsilon = 1e-05))
    model.add(Lambda(lambda x: x**2))
    hp_pool_len = hp.Int('Pooling Length', min_value=10, max_value=250, step=10)
    hp_strides = hp.Int('Pooling Strides', min_value = 5, max_value=50, step=5)
    model.add(AveragePooling2D(pool_size=(1,hp_pool_len),strides=hp_strides))
    
    model.add(Lambda(lambda x: tf.math.log(tf.clip_by_value(x, 1e-6, inf))))
    hp_dropout = hp.Float('Dropout Rate', min_value=0.1, max_value=0.9, step = 0.1)
    model.add(Dropout(hp_dropout))
    
    '''model.add(Conv2D(filters=4, kernel_size=(1,69))) #, activation = 'relu'))       
    
    model.add(Softmax())
    
    model.add(Lambda(lambda x: tf.squeeze(x, [1,2])))'''  #equivalent to flattening which was used instead
    
    model.add(Flatten())
    
    model.add(Dense(2, activation = 'sigmoid'))  #Use softmax and categorical crossentropy for multi-class
    
    loss_fn = 'binary_crossentropy'
    
    hp_learn_rate = hp.Choice('Learning Rate', values=[1e-1,1e-2,1e-3,1e-4,1e-5,1e-6])
    
    
    adam = tf.keras.optimizers.Adam(learning_rate=hp_learn_rate)
    model.compile(loss = loss_fn, optimizer=adam, metrics=['accuracy'], )  # other metrics at keras.io/metrics

    return model

In [18]:
###Initialise tuner
tuner = kt.Hyperband(build_model,
                     objective = 'val_accuracy', 
                     max_epochs = 500,
                     factor = 3,
                     directory=os.getcwd(),
                     project_name = 'TunedCNN-2classLHRH MI-ACTIVE 8chan') ###Folder name to save tuner

In [19]:
tuner.search_space_summary()

Search space summary
Default search space size: 7
Filters (Int)
{'default': None, 'conditions': [], 'min_value': 10, 'max_value': 160, 'step': 10, 'sampling': None}
Kernel1 Length (Int)
{'default': None, 'conditions': [], 'min_value': 10, 'max_value': 250, 'step': 10, 'sampling': None}
Momentum (Float)
{'default': 0.05, 'conditions': [], 'min_value': 0.05, 'max_value': 0.95, 'step': 0.05, 'sampling': None}
Pooling Length (Int)
{'default': None, 'conditions': [], 'min_value': 10, 'max_value': 250, 'step': 10, 'sampling': None}
Pooling Strides (Int)
{'default': None, 'conditions': [], 'min_value': 5, 'max_value': 50, 'step': 5, 'sampling': None}
Dropout Rate (Float)
{'default': 0.1, 'conditions': [], 'min_value': 0.1, 'max_value': 0.9, 'step': 0.1, 'sampling': None}
Learning Rate (Choice)
{'default': 0.1, 'conditions': [], 'values': [0.1, 0.01, 0.001, 0.0001, 1e-05, 1e-06], 'ordered': True}


In [20]:
early_stop = EarlyStopping(monitor='val_loss', patience=50, restore_best_weights=True)

In [21]:
###Begin tuning
tuner.search(x_train,y_cat_train, epochs = 300, validation_data=(x_test,y_cat_test), callbacks = [early_stop], verbose=2)

Trial 725 Complete [00h 08m 52s]
val_accuracy: 0.5392088890075684

Best val_accuracy So Far: 0.6044413447380066
Total elapsed time: 03h 14m 27s
INFO:tensorflow:Oracle triggered exit


In [22]:
###Acquire best model
best_model = tuner.get_best_models(num_models=1)[0]
best_model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 8, 411, 150)       13650     
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 1, 411, 150)       180150    
_________________________________________________________________
batch_normalization (BatchNo (None, 1, 411, 150)       600       
_________________________________________________________________
lambda (Lambda)              (None, 1, 411, 150)       0         
_________________________________________________________________
average_pooling2d (AveragePo (None, 1, 8, 150)         0         
_________________________________________________________________
lambda_1 (Lambda)            (None, 1, 8, 150)         0         
_________________________________________________________________
dropout (Dropout)            (None, 1, 8, 150)         0

In [23]:
####How to get best parameters with examples
best_hps = tuner.get_best_hyperparameters(num_trials=2)[0]

In [24]:
best_hps['Learning Rate']

0.001

In [25]:
best_hps['Momentum']

0.05

In [39]:
###summary of tuning results for 10 best models
tuner.results_summary()   

Results summary
Results in C:\Users\Anthony\OneDrive - University of Witwatersrand\2020 MASTERS\GUI\TunedCNN-2classLHRH MI-ACTIVE 8chan
Showing 10 best trials
Objective(name='val_accuracy', direction='max')
Trial summary
Hyperparameters:
Filters: 150
Kernel1 Length: 90
Momentum: 0.05
Pooling Length: 130
Pooling Strides: 40
Dropout Rate: 0.30000000000000004
Learning Rate: 0.001
tuner/epochs: 19
tuner/initial_epoch: 7
tuner/bracket: 5
tuner/round: 2
tuner/trial_id: dfec56c6e03de71d17aee6a27cd0781d
Score: 0.6044413447380066
Trial summary
Hyperparameters:
Filters: 80
Kernel1 Length: 70
Momentum: 0.2
Pooling Length: 190
Pooling Strides: 35
Dropout Rate: 0.6
Learning Rate: 0.001
tuner/epochs: 19
tuner/initial_epoch: 7
tuner/bracket: 4
tuner/round: 1
tuner/trial_id: a408ba021de35fc9c8e0a9683eef2df9
Score: 0.6044413447380066
Trial summary
Hyperparameters:
Filters: 90
Kernel1 Length: 80
Momentum: 0.55
Pooling Length: 220
Pooling Strides: 20
Dropout Rate: 0.4
Learning Rate: 0.0001
tuner/epochs: 