# EEG Classification using CNN+LSTM model

In [6]:
import os
import numpy as np

from keras.models import Sequential
from keras.layers import Dense, LSTM, Conv1D
from keras.optimizers import RMSprop
from keras.callbacks import Callback, ProgbarLogger, BaseLogger
from keras import backend as K
from keras.regularizers import l1_l2

import data as dt

Using TensorFlow backend.


In [4]:
reload(dt)

<module 'data' from 'data.py'>

In [7]:
path_to_data = '/home/moskaleona/alenadir/data/rawData' #'C:/Users/alena/Desktop/homed/laba/data/rawData'

## Pair learning

In [8]:
data = dt.DataBuildClassifier(path_to_data).get_data([25, 33], shuffle=True, random_state=1, resample_to=128, windows=[(0.2, 0.5)],baseline_window=(0.2, 0.3))

In [6]:
print('Percentage of target class: %f %%'%(data[33][1].mean()*100))
print('Percentage of target class: %f %%'%(data[25][1].mean()*100))

Percentage of target class: 64.027539 %
Percentage of target class: 71.732523 %


In [29]:
print data[33][0].shape
print data[33][1].shape

(581, 39, 19)
(581,)


In [4]:
from sklearn.metrics import roc_auc_score
import logging

class LossMetricHistory(ProgbarLogger):
    def __init__(self, validation_data=(), verbose=1):
        super(LossMetricHistory, self).__init__()
        self.x_val, self.y_val = validation_data
        self.verbose = verbose
        self.logger = logging.getLogger(self.__class__.__name__)
        self.logger.setLevel(logging.INFO)
        console = logging.StreamHandler()
        console.setLevel(logging.INFO)
        formatter = logging.Formatter("%(message)s")
        console.setFormatter(formatter)
        if len(self.logger.handlers) > 0:
            self.logger.handlers = []
        self.logger.addHandler(console)
            
    
    def on_train_begin(self, logs={}):
        self.logger.info("Training began")
        self.losses = []
        self.val_losses = []
        self.accs = []
        self.val_accs = []
        self.aucs = []
    
    def on_epoch_end(self, epoch, logs={}):
        self.losses.append(logs.get('loss'))
        self.val_losses.append(logs.get('val_loss'))
        self.accs.append(logs.get('acc'))
        self.val_accs.append(logs.get('val_acc'))
        
        self.y_pred = self.model.predict_proba(x_val, verbose=0)
        self.aucs.append(roc_auc_score(self.y_val, self.y_pred))
        self.logger.info("epoch %d results: train loss = %.6f, val loss = %.6f"%(epoch + 1, self.losses[-1], self.val_losses[-1]) + 
                     "\n\t\t\tacc = %.6f, val acc = %.6f"%(self.accs[-1], self.val_accs[-1]) +
                     "\n\t\t\tauc = %.6f"%(self.aucs[-1]))
        

In [39]:
from sklearn.metrics import roc_auc_score, accuracy_score
class CnnLstmClassifier():
    def __init__(self, loss='binary_crossentropy', n_filters=10, n_lstm=30):
        self.loss = loss
        self.n_lstm = n_lstm
        self.n_filters = n_filters
    
    def _make_test_model(self, input_shape):
        self.model = Sequential()
        self.model.add(Dense(1, input_dim=3, activation='sigmoid'))
        
    def _make_model(self, input_shape, dropout, recurrent_dropout):
        batch_input_shape = (None, input_shape[1], input_shape[2])
        self.model = Sequential()
        self.model.add(Conv1D(self.n_filters, self.kernel_size, batch_input_shape=batch_input_shape,
                         activation='relu', kernel_regularizer=l1_l2(self.l1, self.l2)))
        self.model.add(LSTM(self.n_lstm,
                       dropout=dropout, recurrent_dropout=recurrent_dropout))
        self.model.add(Dense(1, activation='sigmoid'))
    
    def train(self, X_train, y_train, X_val=None, y_val=None, n_epochs=5, batch_size=10, n_iter=5, learning_rate=0.001,
              l1=0., l2=0., dropout=0., recurrent_dropout=0.):
        self.learning_rate = learning_rate
        self.l1 = l1
        self.l2 = l2
        self.kernel_size = X_train.shape[2]
        self._make_test_model(X_train.shape)
        #self._make_model(X_train.shape, dropout, recurrent_dropout)
        self.optimizer = RMSprop(lr=learning_rate)
        self.model.compile(loss=self.loss, optimizer=self.optimizer, metrics=['acc'])

        #self.log = BaseLogger()#LossMetricHistory(validation_data=(X_val, y_val))
        self.hist = self.model.fit(X_train, y_train,
                        batch_size=batch_size,
                        epochs=n_epochs, 
                        validation_data=(X_val, y_val), verbose=2)#, callbacks=[self.log])
        return self.hist
    
    def predict(self, X, threshold=0.5):
        proba = self.model.predict(X)
        return (proba > threshold).astype('int32')
    
    def predict_proba(self, X):
        return self.model.predict(X)
    
    
    def score(self, X, y, metrics='auc'):
        try:
            if metrics=='auc':
                return roc_auc_score(self.predict_proba(X), y)
            elif metrics=='acc':
                return accuracy_score(self.predict(X), y)
            else:
                raise ValueError(message="No such option: '%s'. Use 'auc' or 'acc'"%str(metrics))
        except ValueError as err:
            print(err)
    

In [40]:
clf = CnnLstmClassifier(n_lstm=2, n_filters=2)
clf.train(data[33][0][:3,:], data[33][1][:3,], data[33][0][4:7,:], data[33][1][4:7,], n_epochs=1)

ValueError: Error when checking input: expected dense_17_input to have 2 dimensions, but got array with shape (3, 39, 19)

In [42]:
model = Sequential()
print data[33][1][:3].shape

(3,)


In [46]:
subjects = set(map(int, os.listdir(path_to_data)))
pairs = [[i, j] for i in subjects for j in subjects if i < j ]
for pair in pairs:
    data = dt.DataBuildClassifier(path_to_data).get_data(pair, shuffle=True, random_state=1,
                                                        resample_to=128, windows=[(0.2, 0.5)],
                                                        baseline_window=(0.2, 0.3))
    X = data

[[25, 36], [25, 33], [33, 36]]


In [1]:
def make_model(input_shape, nb_filters, kernel_size, nb_lstm, l1=0., l2=0., dropout=0., recurrent_dropout=0.):
    model = Sequential()
    model.add(Conv2D(nb_filters, kernel_size, input_shape=input_shape,
                     activation='relu', kernel_regularizer=l1_l2(l1, l2)))
    model.add(LSTM(nb_lstm, batch_input_shape=(None, timesteps, nb_electrodes),
                   dropout=dropout, recurrent_dropout=recurrent_dropout))
    model.add(Dense(1, activation='sigmoid'))
    return model

In [None]:
l2=0.001
l1=0.01
nb_filters = 10
kernel_size = [1, 19] # 19 - is a number of electrodes
nb_lstm = 30
learning_rate = 0.001

In [None]:
# Training
nb_epochs = 100
model = None
model = make_model(X_train.shape, nb_filters, kernel_size, nb_lstm, l1=l1, l2=l2)
optimizer = RMSprop(lr=learning_rate)
model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['acc'])

log = LossMetricHistory(validation_data=(x_val, y_val))
hist = model.fit(x_train, y_train,
                    batch_size=batch_size,
                    epochs=n_iter, 
                    validation_data=(x_val, y_val), callbacks=[log])