In [25]:
import tensorflow as tf
from tensorflow.keras import datasets, layers, models
import matplotlib.pyplot as plt
import numpy as np
import h5py
import pyedflib
from tqdm import tqdm
import time

In [26]:
def _read_py_function(filename):
    #print(filename.numpy().decode())
    f = pyedflib.EdfReader(filename.numpy().decode())
    n_channels = f.signals_in_file
    channels = f.getSignalLabels()
    print(n_channels, f.getNSamples()[0])
    print(len(f.readSignal(1)))
    eeg_data = np.zeros((n_channels, f.getNSamples()[0]), dtype=np.float32)
    for i in np.arange(n_channels):
        eeg_data[i, :] = f.readSignal(i)


    n_samples = f.getNSamples()[0]
    reminder = int(n_samples % 160)
    n_samples -= reminder
    seconds = int(n_samples/160) #160 is frequency
    
    path = filename.numpy().decode().split("/")
    person_id = int(path[-1].partition("S")[2].partition("R")[0]) #extract number between S and R
    label = np.zeros(109, dtype=bool) #109 classes (persons)
    label[person_id-1]=1
    labels = np.tile(label, (seconds,1))
    
    eeg_data = eeg_data.transpose()
    print(eeg_data.shape)
    if reminder > 0:
        eeg_data = eeg_data[:-reminder, :]
    intervals = np.linspace(0, n_samples, num=seconds, endpoint=False, dtype=int)
    print(intervals)
    eeg_data = np.split(eeg_data, intervals) #return a list, remove the first empty
    print(eeg_data[1].shape) 
    del eeg_data[0]
    eeg_data = np.array(eeg_data)   #shape = (seconds, frequency, n_channels)

    return eeg_data, labels



In [49]:
def get_dataset(input='train'):
    path = "files/"
    global dataset
    if input=="train":
        dataset = tf.data.Dataset.list_files(path + "S*/S*R01.edf")
        for i in range(2, 13):
            nth_record = tf.data.Dataset.list_files(path + "S*/S*R" + "{:02d}".format(i) + ".edf")
            dataset = dataset.concatenate(nth_record)
    elif input=="test":
        dataset = tf.data.Dataset.list_files(path + "S*/S*R13.edf")
    elif input=="validation":
        dataset = tf.data.Dataset.list_files(path + "S*/S*R14.edf")

    length = len(list(dataset.as_numpy_iterator()))
    train_data = list()
    labels = list()
    
    #index = 0
    with tqdm(total=length) as pbar:
        for filename in dataset:
            eeg_data, label = _read_py_function(filename)
            train_data.append(eeg_data)
            label = np.expand_dims(label, axis=1)
            labels.append(label)
            #index += 1
            #if index == 10:
            #  break
            pbar.update(1)
    print("Loaded")
    
    return train_data, labels

In [None]:
training_dataset, training_labels = get_dataset(input='train')

In [51]:
train_data = np.empty([1, 160, 64], dtype=np.float32)
train_data = np.vstack(training_dataset)
del(training_dataset)

train_label = np.empty([1, 1, 109], dtype=bool)
train_label = np.vstack(training_labels)
del(training_labels)

In [52]:
print("train_data", train_data.shape)
print("train_data", train_label.shape)

train_data (147061, 160, 64)
train_data (147061, 1, 109)


In [None]:
testing_dataset, testing_labels = get_dataset(input='test')

In [54]:
test_data = np.empty([1, 160, 64], dtype=np.float32)
test_data = np.vstack(testing_dataset)
del(testing_dataset)

test_label = np.empty([1, 1, 109], dtype=bool)
test_label = np.vstack(testing_labels)
del(testing_labels)

In [41]:
def CNN():
    input_shape = (160, 64)

    model = models.Sequential()

    model.add(layers.BatchNormalization(input_shape=input_shape, epsilon=.0001))
    
    model.add(layers.Conv1D(input_shape=input_shape, activation='relu', filters=128, kernel_size=2, strides=1, padding='same'))
    model.add(layers.MaxPooling1D(pool_size=2, strides=2, padding='same'))

    model.add(layers.Conv1D(input_shape=(80, 128), activation='relu', filters=256, kernel_size=2, strides=1, padding='same'))
    model.add(layers.MaxPooling1D(pool_size=2, strides=2, padding='same'))

    model.add(layers.Conv1D(input_shape=(40, 256), activation='relu', filters=512, kernel_size=2, strides=1, padding='same'))
    model.add(layers.MaxPooling1D(pool_size=2, strides=2, padding='same'))

    model.add(layers.Conv1D(input_shape=(20, 512), activation='relu', filters=1024, kernel_size=2, strides=1, padding='same'))
    model.add(layers.MaxPooling1D(pool_size=2, strides=2, padding='same'))

    model.add(layers.Reshape((-1, 64*160), input_shape=(80, 10, 1024)))
    model.add(layers.Dropout(rate=0.5, input_shape=(80, 10240))) #0.5 is keep_prob
    
    model.add(tf.keras.layers.Dense(109, activation='softmax'))

    model.summary()
    return model

In [42]:
model = CNN()

tf.keras.optimizers.Adam(learning_rate=0.00001)

model.compile(optimizer='adam',
          loss=tf.keras.losses.CategoricalCrossentropy(),
          metrics=['accuracy'])

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 batch_normalization (BatchN  (None, 160, 64)          256       
 ormalization)                                                   
                                                                 
 conv1d (Conv1D)             (None, 160, 128)          16512     
                                                                 
 max_pooling1d (MaxPooling1D  (None, 80, 128)          0         
 )                                                               
                                                                 
 conv1d_1 (Conv1D)           (None, 80, 256)           65792     
                                                                 
 max_pooling1d_1 (MaxPooling  (None, 40, 256)          0         
 1D)                                                             
                                                        

In [55]:
print(train_label.shape)
print(test_label.shape)

(147061, 1, 109)
(13380, 1, 109)


In [57]:
checkpoint_path = "checkpoint_cnn_00001/cp-{epoch:04d}.ckpt"
cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
                                                 save_weights_only=True,
                                                 verbose=1)
model.save_weights(checkpoint_path.format(epoch=0))

In [62]:
# Load necessary libraries
import mne
from mne.decoding import Vectorizer

from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import cross_val_score, train_test_split, GridSearchCV, StratifiedKFold
from sklearn.metrics import classification_report, accuracy_score, precision_recall_fscore_support

# Models
from sklearn import svm
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.linear_model import LogisticRegression

In [64]:
clf_svm_0 = make_pipeline(Vectorizer(), StandardScaler(), svm.SVC(kernel='rbf', C=1))
scores = cross_val_score(clf_svm_0, train_data, train_label, cv=5)
for i in range(len(scores)):   
    print('Accuracy of ' + str(i+1) + 'th fold is ' + str(scores[i]) + '\n')

KeyboardInterrupt: 