In [1]:
import pandas as pd
import glob 
import os 
import numpy as np

# Task 1: Merge datasets

In [2]:
def extractFiles(path): 

    all_files = glob.glob(os.path.join(path , "*.csv"))
    return all_files 

In [3]:
path = r'C:\Users\Rahul Nagarajan\Desktop\MS Project\Untitled Folder\Test_Data' 
all_files = extractFiles(path)

In [4]:
def ProcessDF(all_files):
    li = []

    for filename in all_files:
        df = pd.read_csv(filename, skiprows=1, index_col=None, header=0)
        #Epoc X has 14 channels and sampling rate of 128hz
        df = df.iloc[:, [4,5,6,7,8,9,10,11,12,13,14,15,16,17]]
        li.append(df)
    return li

In [5]:
df_list = ProcessDF(all_files)

In [6]:
for count, dataframe in enumerate(df_list):
    dataframe.to_csv(f"dataframe_{count}.csv", index=False)

In [7]:
df_path = r'C:\Users\Rahul Nagarajan\Desktop\MS Project\Untitled Folder' 

In [8]:
all_dfs = extractFiles(df_path)

# Task 2: Feature Extraction

In [9]:
import eeglib

In [10]:
#Preprocessing the data with a highpass filter of 1hz and lowpass filter of 50hz
fdfs = []
for dfs in all_dfs: 
 helper = eeglib.helpers.CSVHelper(dfs, sampleRate=128, normalize=True, highpass=1, lowpass=50)
 wrapper = eeglib.wrapper.Wrapper(helper)
 wrapper.addFeature.PFD()
 data = wrapper.getAllFeatures()
 fdfs.append(data)

In [11]:
finaldfs = []
for dfs in fdfs: 
 dfs = dfs.assign(**dfs.mean())
 dfs = dfs.head(1)
 finaldfs.append(dfs)

In [12]:
df_merged = pd.concat(finaldfs, axis=0, ignore_index=True)

In [13]:
df_merged

Unnamed: 0,PFD(){}_0,PFD(){}_1,PFD(){}_2,PFD(){}_3,PFD(){}_4,PFD(){}_5,PFD(){}_6,PFD(){}_7,PFD(){}_8,PFD(){}_9,PFD(){}_10,PFD(){}_11,PFD(){}_12,PFD(){}_13
0,1.034265,1.034688,1.034054,1.03487,1.03597,1.035939,1.035429,1.035172,1.034865,1.035038,1.035239,1.03471,1.034161,1.034258
1,1.034297,1.035565,1.03542,1.037621,1.036788,1.035257,1.034181,1.032803,1.03341,1.034871,1.036866,1.032984,1.033758,1.035295
2,1.02321,1.026737,1.023735,1.026127,1.030168,1.027421,1.024459,1.02121,1.022945,1.028143,1.026272,1.02662,1.026775,1.025487
3,1.024048,1.027517,1.024481,1.027255,1.031447,1.03152,1.026182,1.027095,1.027536,1.029599,1.029317,1.025052,1.026661,1.02509
4,1.029584,1.030885,1.029757,1.03205,1.035756,1.032672,1.028656,1.030349,1.03197,1.033685,1.033244,1.031029,1.030507,1.029594
5,1.027824,1.030331,1.027972,1.031281,1.03495,1.031178,1.029659,1.027704,1.029421,1.032155,1.031817,1.029396,1.031119,1.030421
6,1.026456,1.028372,1.025252,1.02913,1.027903,1.026527,1.021865,1.02122,1.028606,1.029428,1.028325,1.025712,1.027741,1.025584


In [14]:
df_merged.shape

(7, 14)

# Task 3: Label the dataset

In [15]:
#Under the assumption that Vishwadeep, Cleo and Shane are the only users to be authenticated to the system. 
df_merged.loc[0:3, 'Authenticated'] = 'False'
df_merged.loc[4:6, 'Authenticated'] = 'True'

In [16]:
df_merged

Unnamed: 0,PFD(){}_0,PFD(){}_1,PFD(){}_2,PFD(){}_3,PFD(){}_4,PFD(){}_5,PFD(){}_6,PFD(){}_7,PFD(){}_8,PFD(){}_9,PFD(){}_10,PFD(){}_11,PFD(){}_12,PFD(){}_13,Authenticated
0,1.034265,1.034688,1.034054,1.03487,1.03597,1.035939,1.035429,1.035172,1.034865,1.035038,1.035239,1.03471,1.034161,1.034258,False
1,1.034297,1.035565,1.03542,1.037621,1.036788,1.035257,1.034181,1.032803,1.03341,1.034871,1.036866,1.032984,1.033758,1.035295,False
2,1.02321,1.026737,1.023735,1.026127,1.030168,1.027421,1.024459,1.02121,1.022945,1.028143,1.026272,1.02662,1.026775,1.025487,False
3,1.024048,1.027517,1.024481,1.027255,1.031447,1.03152,1.026182,1.027095,1.027536,1.029599,1.029317,1.025052,1.026661,1.02509,False
4,1.029584,1.030885,1.029757,1.03205,1.035756,1.032672,1.028656,1.030349,1.03197,1.033685,1.033244,1.031029,1.030507,1.029594,True
5,1.027824,1.030331,1.027972,1.031281,1.03495,1.031178,1.029659,1.027704,1.029421,1.032155,1.031817,1.029396,1.031119,1.030421,True
6,1.026456,1.028372,1.025252,1.02913,1.027903,1.026527,1.021865,1.02122,1.028606,1.029428,1.028325,1.025712,1.027741,1.025584,True


In [17]:
labels = ({'True':1,'False':0})
df_encoded = df_merged.replace(labels)

In [18]:
df_encoded

Unnamed: 0,PFD(){}_0,PFD(){}_1,PFD(){}_2,PFD(){}_3,PFD(){}_4,PFD(){}_5,PFD(){}_6,PFD(){}_7,PFD(){}_8,PFD(){}_9,PFD(){}_10,PFD(){}_11,PFD(){}_12,PFD(){}_13,Authenticated
0,1.034265,1.034688,1.034054,1.03487,1.03597,1.035939,1.035429,1.035172,1.034865,1.035038,1.035239,1.03471,1.034161,1.034258,0
1,1.034297,1.035565,1.03542,1.037621,1.036788,1.035257,1.034181,1.032803,1.03341,1.034871,1.036866,1.032984,1.033758,1.035295,0
2,1.02321,1.026737,1.023735,1.026127,1.030168,1.027421,1.024459,1.02121,1.022945,1.028143,1.026272,1.02662,1.026775,1.025487,0
3,1.024048,1.027517,1.024481,1.027255,1.031447,1.03152,1.026182,1.027095,1.027536,1.029599,1.029317,1.025052,1.026661,1.02509,0
4,1.029584,1.030885,1.029757,1.03205,1.035756,1.032672,1.028656,1.030349,1.03197,1.033685,1.033244,1.031029,1.030507,1.029594,1
5,1.027824,1.030331,1.027972,1.031281,1.03495,1.031178,1.029659,1.027704,1.029421,1.032155,1.031817,1.029396,1.031119,1.030421,1
6,1.026456,1.028372,1.025252,1.02913,1.027903,1.026527,1.021865,1.02122,1.028606,1.029428,1.028325,1.025712,1.027741,1.025584,1


In [29]:
X = df_encoded.drop('Authenticated', axis=1).copy()
y = df_encoded['Authenticated'].copy()

In [34]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7, random_state=123)

In [35]:
inputs = tf.keras.Input(shape=(X_train.shape[1],))
expand_dims = tf.expand_dims(inputs, axis=2)
gru = tf.keras.layers.GRU(256, return_sequences=True)(expand_dims)
flatten = tf.keras.layers.Flatten()(gru)
outputs = tf.keras.layers.Dense(3, activation='softmax')(flatten)
model = tf.keras.Model(inputs=inputs, outputs=outputs)

model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)
print(model.summary())

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 14)]              0         
                                                                 
 tf.expand_dims_1 (TFOpLambd  (None, 14, 1)            0         
 a)                                                              
                                                                 
 gru_1 (GRU)                 (None, 14, 256)           198912    
                                                                 
 flatten_1 (Flatten)         (None, 3584)              0         
                                                                 
 dense_2 (Dense)             (None, 3)                 10755     
                                                                 
Total params: 209,667
Trainable params: 209,667
Non-trainable params: 0
_____________________________________________________

In [36]:
history = model.fit(
    X_train,
    y_train,
    validation_split=0.2,
    batch_size=32,
    epochs=50,
    callbacks=[
        tf.keras.callbacks.EarlyStopping(
            monitor='val_loss',
            patience=5,
            restore_best_weights=True
        )
    ]
)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50


In [37]:
model_acc = model.evaluate(X_test, y_test, verbose=0)[1]

print("Test Accuracy: {:.3f}%".format(model_acc * 100))

Test Accuracy: 66.667%
