In [None]:
from google.colab import drive
drive.mount('/content/gdrive') #, force_remount=True)

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [None]:
!pip install git+https://github.com/holgern/pyedflib.git

Collecting git+https://github.com/holgern/pyedflib.git
  Cloning https://github.com/holgern/pyedflib.git to /tmp/pip-req-build-l8cij5kp
  Running command git clone -q https://github.com/holgern/pyedflib.git /tmp/pip-req-build-l8cij5kp
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
    Preparing wheel metadata ... [?25l[?25hdone
Building wheels for collected packages: pyEDFlib
  Building wheel for pyEDFlib (PEP 517) ... [?25l[?25hdone
  Created wheel for pyEDFlib: filename=pyEDFlib-0.1.24.dev0+039c230-cp37-cp37m-linux_x86_64.whl size=1527185 sha256=2ee74f86a42ef279b412a8fb863cf9d04c1daa120b0ca81d67b809d10d5d1830
  Stored in directory: /tmp/pip-ephem-wheel-cache-hxhfuc9p/wheels/40/ca/9e/ac89f25e7cfde87119c99fab893236fb4907cf2ab6d84ac51e
Successfully built pyEDFlib
Installing collected packages: pyEDFlib
Successfully installed pyEDFlib-0.1.24.dev0+039c230


In [None]:

import tensorflow as tf
from tensorflow.keras import datasets, layers, models
import matplotlib.pyplot as plt
import numpy as np
import h5py
import pyedflib
from tqdm import tqdm
import time

In [None]:
def CNN():
    input_shape = (160, 64)

    model = models.Sequential()

    model.add(layers.BatchNormalization(input_shape=input_shape, epsilon=.0001))
    
    model.add(layers.Conv1D(input_shape=input_shape, activation='relu', filters=128, kernel_size=2, strides=1, padding='same'))
    model.add(layers.MaxPooling1D(pool_size=2, strides=2, padding='same'))

    model.add(layers.Conv1D(input_shape=(80, 128), activation='relu', filters=256, kernel_size=2, strides=1, padding='same'))
    model.add(layers.MaxPooling1D(pool_size=2, strides=2, padding='same'))

    model.add(layers.Conv1D(input_shape=(40, 256), activation='relu', filters=512, kernel_size=2, strides=1, padding='same'))
    model.add(layers.MaxPooling1D(pool_size=2, strides=2, padding='same'))

    model.add(layers.Conv1D(input_shape=(20, 512), activation='relu', filters=1024, kernel_size=2, strides=1, padding='same'))
    model.add(layers.MaxPooling1D(pool_size=2, strides=2, padding='same'))

    model.add(layers.Reshape((-1, 64*160), input_shape=(80, 10, 1024)))
    #model.add(layers.Reshape((-1, 64*160)))
    model.add(layers.Dropout(rate=0.5, input_shape=(80, 10240))) #0.5 is keep_prob
    #model.add(layers.Dropout(0.5)) #0.5 is keep_prob
    
    model.add(tf.keras.layers.Dense(109, activation='softmax'))

    model.summary()
    return model

In [None]:
def _read_py_function(filename):
    #print(filename.numpy().decode())
    f = pyedflib.EdfReader(filename.numpy().decode())
    n_channels = f.signals_in_file
    channels = f.getSignalLabels()
    eeg_data = np.zeros((n_channels, f.getNSamples()[0]), dtype=np.float32)
    for i in np.arange(n_channels):
        eeg_data[i, :] = f.readSignal(i)

    n_samples = f.getNSamples()[0]
    reminder = int(n_samples % 160)
    n_samples -= reminder
    seconds = int(n_samples/160) #160 is frequency
    
    path = filename.numpy().decode().split("/")
    person_id = int(path[-1].partition("S")[2].partition("R")[0]) #extract number between S and R
    label = np.zeros(109, dtype=bool) #109 classes (persons)
    label[person_id-1]=1
    labels = np.tile(label, (seconds,1))
    
    eeg_data = eeg_data.transpose()
    if reminder > 0:
        eeg_data = eeg_data[:-reminder, :]
    intervals = np.linspace(0, n_samples, num=seconds, endpoint=False, dtype=int)
    eeg_data = np.split(eeg_data, intervals) #return a list, remove the first empty 
    del eeg_data[0]
    eeg_data = np.array(eeg_data)   #shape = (seconds, frequency, n_channels)

    return eeg_data, labels

In [None]:
def get_dataset(train=True):
    path = "/content/gdrive/MyDrive/eeg_person_identification/eeg-motor-movementimagery-dataset-1.0.0/files/"
    if input=="train":
        dataset = tf.data.Dataset.list_files(path + "S*\S*R01.edf")
        for i in range(2, 13):
            nth_record = tf.data.Dataset.list_files(path + "S*\S*R" + "{:02d}".format(i) + ".edf")
            dataset = dataset.concatenate(nth_record)
    elif input=="test":
        dataset = tf.data.Dataset.list_files(path + "S*\S*R13.edf")
    elif input=="validation":
        dataset = tf.data.Dataset.list_files(path + "S*\S*R14.edf")

    length = len(list(dataset.as_numpy_iterator()))
    train_data = list()
    labels = list()
    
    #index = 0
    with tqdm(total=length) as pbar:
        for filename in dataset:
            eeg_data, label = _read_py_function(filename)
            train_data.append(eeg_data)
            label = np.expand_dims(label, axis=1)
            labels.append(label)
            #index += 1
            #if index == 10:
            #  break
            pbar.update(1)
    print("Loaded")
    
    return train_data, labels

In [None]:
training_dataset, training_labels = get_dataset(train=True)

100%|██████████| 1090/1090 [01:46<00:00, 10.22it/s]

Loaded





In [None]:
train_data = np.empty([1, 160, 64], dtype=np.float32)
train_data = np.vstack(training_dataset)
del(training_dataset)

train_label = np.empty([1, 1, 109], dtype=bool)
train_label = np.vstack(training_labels)
del(training_labels)

In [None]:
testing_dataset, testing_labels = get_dataset(train=False)

100%|██████████| 436/436 [01:21<00:00,  5.32it/s]

Loaded





In [None]:
test_data = np.empty([1, 160, 64], dtype=np.float32)
test_data = np.vstack(testing_dataset)
del(testing_dataset)

test_label = np.empty([1, 1, 109], dtype=bool)
test_label = np.vstack(testing_labels)
del(testing_labels)

In [None]:
model = CNN()

tf.keras.optimizers.Adam(learning_rate=0.00001)

model.compile(optimizer='adam',
          loss=tf.keras.losses.CategoricalCrossentropy(),
          metrics=['accuracy'])

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
batch_normalization_2 (Batch (None, 160, 64)           256       
_________________________________________________________________
conv1d_8 (Conv1D)            (None, 160, 128)          16512     
_________________________________________________________________
max_pooling1d_8 (MaxPooling1 (None, 80, 128)           0         
_________________________________________________________________
conv1d_9 (Conv1D)            (None, 80, 256)           65792     
_________________________________________________________________
max_pooling1d_9 (MaxPooling1 (None, 40, 256)           0         
_________________________________________________________________
conv1d_10 (Conv1D)           (None, 40, 512)           262656    
_________________________________________________________________
max_pooling1d_10 (MaxPooling (None, 20, 512)          

In [None]:
print(train_label.shape)
print(test_label.shape)

(120298, 1, 109)
(53526, 1, 109)


In [None]:
checkpoint_path = "/content/gdrive/MyDrive/eeg_person_identification/checkpoint_cnn_00001/cp-{epoch:04d}.ckpt"
cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
                                                 save_weights_only=True,
                                                 verbose=1)
model.save_weights(checkpoint_path.format(epoch=0))

NameError: ignored

In [None]:
with tf.device('/CPU:0'):
    history = model.fit(train_data, train_label, epochs=50, validation_data=(test_data, test_label), batch_size = 80,  callbacks=[cp_callback])

In [None]:
#use this to load: 
checkpoint_path = "/content/gdrive/MyDrive/eeg_person_identification/checkpoint_cnn_00001/"#cp.ckpt"
latest = tf.train.latest_checkpoint(checkpoint_path) #it takes the folder
print(latest)
model.load_weights(latest)

/content/gdrive/MyDrive/eeg_person_identification/checkpoint_10-4/cp.ckpt


<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7fc1aa064e50>

In [None]:
np.save('/content/gdrive/MyDrive/eeg_person_identification/checkpoint_cnn_00001/history.npy',history.history)
history = np.load('/content/gdrive/MyDrive/eeg_person_identification/checkpoint_cnn_00001/history.npy',allow_pickle='TRUE').item()

In [None]:
if type(history) is not dict:
    history = history.history

plt.plot(history['accuracy'], label='accuracy')
plt.plot(history['auc'], label='auc')
plt.plot(history['val_accuracy'], label = 'val_accuracy')
plt.plot(history['val_auc'], label='val_auc')
plt.xlabel('Epoch')
plt.ylabel('Accuracy/AUC')
plt.ylim([0.5, 1])
plt.legend(loc='lower right')
plt.show()

"if type(history) is not dict:\n    history = history.history\n\nplt.plot(history['accuracy'], label='accuracy')\nplt.plot(history['auc'], label='auc')\nplt.plot(history['val_accuracy'], label = 'val_accuracy')\nplt.plot(history['val_auc'], label='val_auc')\nplt.xlabel('Epoch')\nplt.ylabel('Accuracy/AUC')\nplt.ylim([0.5, 1])\nplt.legend(loc='lower right')\nplt.show()"

In [None]:
max_value = max(history['val_accuracy'])
print(max_value)
max_index = history['val_accuracy'].index(max_value)
print(max_index)
print(history['accuracy'][max_index])
best_checkpoint_path = "/content/gdrive/MyDrive/eeg_person_identification/checkpoint_cnn_00001/cp-{:04d}.ckpt".format(max_index)
model.load_weights(best_checkpoint_path)

In [None]:
test_loss, test_accuracy, test_auc, FP, FN, TP, TN = model.evaluate(x=test_data,  y=test_label)
print(test_loss)
print(test_accuracy)



In [None]:
leng = test_label.shape[0]

i = 0
correct = 0
wrong = 0

preds = list()
real = list()

with tqdm(total=leng) as pbar:
    for sample, label in zip(test_data, test_label):
        item = np.expand_dims(sample, axis=0)
        #print(item)
        #print(item.shape)
        out = model.predict(item)
        y_pred = np.argmax(out)
        out[np.where(out!=np.max(out))] = 0
        out[np.where(out==np.max(out))] = 1
        y = np.argmax(label)
        if y_pred == y:
            correct += 1
        else:
            wrong += 1
        #print(y_pred)
        #print(y)
        real.append(y)
        preds.append(y_pred)
        pbar.update(1)

100%|██████████| 53526/53526 [49:11<00:00, 18.14it/s]


In [None]:
print(len(preds))
print(len(real))

y_test = np.empty([1, 1, 109], dtype=int)
y_test = np.vstack(real)

y_pred = np.empty([1, 1, 109], dtype=int)
y_pred = np.vstack(preds)

53526
53526


'y_pred = np.squeeze(y_pred, axis=1)\ny_pred = y_pred.astype(int)\n\ny_test = label_binarize(y_test, classes=list(range(0, 109)))\n\nn_classes = y_test.shape[1]'

In [None]:
print(y_test.shape)
print(y_pred.shape)

results = np.column_stack((y_test, y_pred))
print(results)
print(results.shape)

(53526, 1)
(53526, 1)
[[66 66]
 [66 66]
 [66 66]
 ...
 [27 27]
 [27 27]
 [27 96]]
(53526, 2)


In [None]:
from statistics import mean

frrs = list(0 for x in range(0, 109))
fars = list(0 for x in range(0, 109))
#(test, pred)
for s in range(0, 109):
    #print("Class")
    #print(s)
    resultsTempTrue  = results[np.where(results[:,0]==s)]  #label 
    resultsTempFalse = results[np.where(results[:,1]==s)]  #predetti bene
    #print(resultsTempFalse)
    #print(resultsTempFalse.shape)
    cnt4 = resultsTempFalse.shape[0] #numero predetti bene
    errors = resultsTempFalse[resultsTempFalse[:,0]!=s]  #predetti male
    #print(errors.shape)
    #print(errors)
    cnt3 = errors.shape[0]  #numero predetti male
    cnt  = 0
    cnt1 = 0 
    #print("shape")
    #print(resultsTempTrue.shape[0])
    for i in range(0, resultsTempTrue.shape[0]):
        cnt1 += 1
        if resultsTempTrue[i][0] != resultsTempTrue[i][1]:
            cnt += 1
    #print("cnt1")
    #print(cnt1)
    frrTemp = cnt3/cnt1 
    frrs.append(frrTemp)

    farTemp = cnt3/cnt4
    fars.append(farTemp)
    #print(frrTemp)
    #print(farTemp)

FAR_mean = mean(fars)
FRR_mean = mean(frrs)
eer = (FRR_mean + FAR_mean)/2
print(FAR_mean)
print(FRR_mean)
print(eer)

0.01988123006018495
0.020980459950640018
0.020430845005412486
