In [2]:
import os
import re
import glob
import datetime
import numpy as np
from keras import utils, models, layers, backend, callbacks

Using TensorFlow backend.


In [9]:
CAF_DOSE = 200
STAGE = 'NREM'
TEST_SUBJECT_COUNT = 5

DATA_PATH = 'C:\\Users\\Philipp\\Documents\\Caffeine\\raw_eeg{dose}'.format(dose=CAF_DOSE)

In [43]:
class RawGenerator(utils.Sequence):
    
    def __init__(self, files_caf, files_plac, files_per_batch, permute_labels=False):
        self.files_caf, self.files_plac = np.array(files_caf), np.array(files_plac)
        self.files_per_batch = files_per_batch
        self.permute_labels = permute_labels

    def __len__(self):
        return int(np.ceil(min(len(self.files_caf), len(self.files_plac)) / self.files_per_batch))

    def __getitem__(self, idx):
        start = idx * self.files_per_batch
        end = (idx + 1) * self.files_per_batch
        
        batch_files_caf = self.files_caf[start:end]
        batch_files_plac = self.files_plac[start:end]
        
        batch_data_list_caf = [np.load(file) for file in batch_files_caf]
        samples_per_file_caf = [x.shape[0] for x in batch_data_list_caf]
        
        batch_data_list_plac = [np.load(file) for file in batch_files_plac]
        samples_per_file_plac = [x.shape[0] for x in batch_data_list_plac]
        
        batch_x = np.concatenate(batch_data_list_caf + batch_data_list_plac, axis=0)
        batch_y = np.array([[1, 0]] * np.sum(samples_per_file_caf) + [[0, 1]] * np.sum(samples_per_file_plac))
        
        batch_x = utils.normalize(batch_x, axis=0)
        
        if self.permute_labels:
            labels = [[0, 1]] * (len(batch_y) // 2) + [[1, 0]] * (len(batch_y) // 2)
            if len(labels) < len(batch_y):
                labels += [[1, 0]]
            batch_y = np.array(labels)
            return batch_x[np.random.permutation(batch_x.shape[0])], batch_y[np.random.permutation(batch_y.shape[0])]
        else:
            perm = np.random.permutation(batch_x.shape[0])
            return batch_x[perm], batch_y[perm]

    def on_epoch_end(self):
        self.files_caf = self.files_caf[np.random.permutation(len(self.files_caf))]
        self.files_plac = self.files_plac[np.random.permutation(len(self.files_plac))]

In [44]:
def get_subject_id(path):
    return re.match('\S\d+', path.split(os.sep)[-1].split('_')[0])[0]
    
caf_files = glob.glob(os.path.join(DATA_PATH, f'*{STAGE}*CAF*'))
plac_files = glob.glob(os.path.join(DATA_PATH, f'*{STAGE}*PLAC*'))

caf_subjects = set([get_subject_id(file) for file in caf_files])
plac_subjects = set([get_subject_id(file) for file in plac_files])

subjects = caf_subjects & plac_subjects

test_subjects = set()
while len(test_subjects) < TEST_SUBJECT_COUNT:
    test_subjects.add(np.random.choice(list(subjects)))

print(f'Found raw EEG data for {len(subjects)} subjects, leaving {len(test_subjects)} for validation')
print(f'Caffeine data files: {len(caf_files)}, placebo data files: {len(plac_files)}')
    
caf_files_train = [file for file in caf_files if get_subject_id(file) not in test_subjects]
plac_files_train = [file for file in plac_files if get_subject_id(file) not in test_subjects]
caf_files_test = [file for file in caf_files if get_subject_id(file) in test_subjects]
plac_files_test = [file for file in plac_files if get_subject_id(file) in test_subjects]

train = RawGenerator(caf_files_train, plac_files_train, 3)
test = RawGenerator(caf_files_test, plac_files_test, 3, permute_labels=True)

Found raw EEG data for 31 subjects, leaving 5 for validation
Caffeine data files: 346, placebo data files: 394


In [46]:
a = np.array([[sum(train[i][1] == [0, 1])[0], sum(train[i][1] == [1, 0])[0]] for i in range(len(train))])
print(a[:,0].sum() / a.sum() * 100, a[:,1].sum() / a.sum() * 100)

49.983938323160935 50.016061676839065


In [47]:
backend.clear_session()

model = models.Sequential()
model.add(layers.Conv1D(filters=16, kernel_size=15, strides=4, activation='relu', input_shape=(5120, 20)))
model.add(layers.MaxPooling1D(pool_size=6))
model.add(layers.Conv1D(filters=8, kernel_size=10, strides=3, activation='relu'))
model.add(layers.MaxPooling1D(pool_size=6))
model.add(layers.Flatten())
model.add(layers.Dense(32, activation='relu'))
model.add(layers.Dense(2, activation='softmax'))

model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_1 (Conv1D)            (None, 1277, 16)          4816      
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (None, 212, 16)           0         
_________________________________________________________________
conv1d_2 (Conv1D)            (None, 68, 8)             1288      
_________________________________________________________________
max_pooling1d_2 (MaxPooling1 (None, 11, 8)             0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 88)                0         
_________________________________________________________________
dense_1 (Dense)              (None, 32)                2848      
_________________________________________________________________
dense_2 (Dense)              (None, 2)                 66        
Total para

In [48]:
model.compile(optimizer='rmsprop',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [None]:
name = datetime.datetime.now().strftime('conv_%Y-%m-%d_%H-%M-%S')
tensorboard = callbacks.TensorBoard(log_dir='..\\results\\logs\\' + name)

model.fit_generator(generator=train,
                    epochs=100,
                    validation_data=test,
                    max_queue_size=6,
                    workers=6,
                    callbacks=[tensorboard])

Instructions for updating:
Use tf.cast instead.
Epoch 1/100
Epoch 2/100

In [None]:
print('Training accuracy:', model.evaluate_generator(train)[1])
print('Testing accuracy: ', model.evaluate_generator(test)[1])