In [17]:
import os
import re
import glob
import pickle
import numpy as np
from keras import models, utils
from sklearn import preprocessing

In [4]:
CAF_DOSE = 200
STAGE = 'NREM'

DATA_PATH = 'C:\\Users\\Philipp\\Documents\\Caffeine\\raw_eeg{dose}'.format(dose=CAF_DOSE)

In [24]:
class RawGenerator(utils.Sequence):
    
    def __init__(self, files_caf, files_plac, files_per_batch, permute_labels=False):
        self.files_caf, self.files_plac = np.array(files_caf), np.array(files_plac)
        self.files_per_batch = files_per_batch
        self.permute_labels = permute_labels
        
        self.on_epoch_end()

    def __len__(self):
        return int(np.ceil(min(len(self.files_caf), len(self.files_plac)) / self.files_per_batch))

    def __getitem__(self, idx):
        start = idx * self.files_per_batch
        end = (idx + 1) * self.files_per_batch
        
        batch_files_caf = self.files_caf[start:end]
        batch_files_plac = self.files_plac[start:end]
        
        batch_data_list_caf = [np.load(file) for file in batch_files_caf]
        samples_per_file_caf = [x.shape[0] for x in batch_data_list_caf]
        
        batch_data_list_plac = [np.load(file) for file in batch_files_plac]
        samples_per_file_plac = [x.shape[0] for x in batch_data_list_plac]
        
        batch_x = np.concatenate(batch_data_list_caf + batch_data_list_plac, axis=0)
        batch_y = np.array([[0, 1]] * np.sum(samples_per_file_caf) + [[1, 0]] * np.sum(samples_per_file_plac))
        
        for i in range(batch_x.shape[2]):
            preprocessing.normalize(batch_x[:,:,i], norm='l2', axis=1, copy=False)

        
        if self.permute_labels:
            labels = [[1, 0]] * (len(batch_y) // 2) + [[0, 1]] * (len(batch_y) // 2)
            if len(labels) < len(batch_y):
                labels += [[1, 0]]
            batch_y = np.array(labels)
            return batch_x[np.random.permutation(batch_x.shape[0])], batch_y[np.random.permutation(batch_y.shape[0])]
        else:
            perm = np.random.permutation(batch_x.shape[0])
            return batch_x[perm], batch_y[perm]

    def on_epoch_end(self):
        self.files_caf = self.files_caf[np.random.permutation(len(self.files_caf))]
        self.files_plac = self.files_plac[np.random.permutation(len(self.files_plac))]

In [27]:
def get_subject_id(path):
    return re.match('\S\d+', path.split(os.sep)[-1].split('_')[0])[0]
    
caf_files = glob.glob(os.path.join(DATA_PATH, f'*{STAGE}*CAF*'))
plac_files = glob.glob(os.path.join(DATA_PATH, f'*{STAGE}*PLAC*'))

caf_subjects = set([get_subject_id(file) for file in caf_files])
plac_subjects = set([get_subject_id(file) for file in plac_files])

used = set(pickle.load(open('..\\subjects.pickle', 'rb')))

subjects = caf_subjects & plac_subjects

print(f'Using raw EEG data from {len(subjects)} subjects')
    
caf_files = [file for file in caf_files if get_subject_id(file) in subjects]
plac_files = [file for file in plac_files if get_subject_id(file) in subjects]

data = RawGenerator(caf_files, plac_files, 1, permute_labels=False)

Using raw EEG data from 40 subjects


In [9]:
model = models.load_model('..\\model_1.h5')

In [28]:
acc_sum = 0

for batch in range(len(data)):
    current = data[batch]
    acc_sum += model.evaluate(current[0], current[1], verbose=0)[1]
    print(f'Mean accuracy after batch ({batch + 1}/{len(data)}): {acc_sum / (batch + 1) * 100:.3f}%', end='\r')

Mean accuracy after batch (437/437): 71.582%