In [1]:
"""
Adapted from: https://github.com/aliasvishnu/EEGNet/blob/master/EEGNet-PyTorch.ipynb

Original paper - https://arxiv.org/abs/1611.08024

Please reach out to me if you spot an error
"""

'\nAdapted from: https://github.com/aliasvishnu/EEGNet/blob/master/EEGNet-PyTorch.ipynb\n\nOriginal paper - https://arxiv.org/abs/1611.08024\n\nPlease reach out to me if you spot an error\n'

In [19]:
import pandas as pd 
import numpy as np
from numpy import array
from sklearn.metrics import roc_auc_score, precision_score, recall_score, accuracy_score
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
import torch.nn.functional as F
import torch.optim as optim
from ipynb.fs.full.Data_Processing import extract_test_number

There are 5 paragraphs in the dataset
timeRangeStart has distinct values
True
Initial rows: 7955
Final rows: 7544
Dropped rows: 411
Working on this csv: User004c_test8_IET_08_02-09-2019_recording0_U1567417804_EEG_rawEEGData.csv
test8
There are 5 paragraphs in the dataset
timeRangeStart has distinct values
True
Initial rows: 9996
Final rows: 0
Dropped rows: 9996
Working on this csv: User004c_test9_BET_09_02-09-2019_recording0_U1567418029_EEG_rawEEGData.csv
test9
There are 5 paragraphs in the dataset
timeRangeStart has distinct values
True
Initial rows: 13153
Final rows: 0
Dropped rows: 13153
Working on this csv: User004d_test12_IES_12_02-09-2019_recording0_U1567419281_EEG_rawEEGData.csv
test12
There are 5 paragraphs in the dataset
timeRangeStart has distinct values
True
Initial rows: 13549
Final rows: 0
Dropped rows: 13549
Working on this csv: User004d_test14_IDS_14_02-09-2019_recording0_U1567419722_EEG_rawEEGData.csv
test14
There are 5 paragraphs in the dataset
timeRangeStart has disti



In [3]:
class EEGNet(nn.Module):
    def __init__(self):
        super(EEGNet, self).__init__()
        self.T = 120
        
        # Layer 1
        self.conv1 = nn.Conv2d(1, 16, (1, 64), padding = 0)
        self.batchnorm1 = nn.BatchNorm2d(16, False)
        
        # Layer 2
        self.padding1 = nn.ZeroPad2d((16, 17, 0, 1))
        self.conv2 = nn.Conv2d(1, 4, (2, 32))
        self.batchnorm2 = nn.BatchNorm2d(4, False)
        self.pooling2 = nn.MaxPool2d(2, 4)
        
        # Layer 3
        self.padding2 = nn.ZeroPad2d((2, 1, 4, 3))
        self.conv3 = nn.Conv2d(4, 4, (8, 4))
        self.batchnorm3 = nn.BatchNorm2d(4, False)
        self.pooling3 = nn.MaxPool2d((2, 4))
        
        # FC Layer
        # NOTE: This dimension will depend on the number of timestamps per sample in your data.
        # I have 120 timepoints. 
        self.fc1 = nn.Linear(4*2*7, 1)
        
    def forward(self, x):
        # Layer 1
        x = F.elu(self.conv1(x))
        x = self.batchnorm1(x)
        x = F.dropout(x, 0.25)
        x = x.permute(0, 3, 1, 2)

        # Layer 2
        x = self.padding1(x)
        x = F.elu(self.conv2(x))
        x = self.batchnorm2(x)
        x = F.dropout(x, 0.25)
        x = self.pooling2(x)

        # Layer 3
        x = self.padding2(x)
        x = F.elu(self.conv3(x))
        x = self.batchnorm3(x)
        x = F.dropout(x, 0.25)
        x = self.pooling3(x)

        # FC Layer
        x = x.view(-1, 4*2*7)
        x = F.sigmoid(self.fc1(x))
        return x

In [4]:
net = EEGNet().cuda(0)
print (net.forward(Variable(torch.Tensor(np.random.rand(1, 1, 120, 64)).cuda(0))))
criterion = nn.BCELoss()
optimizer = optim.Adam(net.parameters())

tensor([[0.7924]], device='cuda:0', grad_fn=<SigmoidBackward>)




**Evaluate function returns values of different criteria like accuracy, precision etc.**
In case you face memory overflow issues, use batch size to control how many samples get evaluated at one time. Use a batch_size that is a factor of length of samples. This ensures that you won't miss any samples.

In [5]:
def evaluate(model, X, Y, params = ["acc"]):
    results = []
    batch_size = 100
    
    predicted = []
    
    for i in range(len(X)//batch_size):
        s = i*batch_size
        e = i*batch_size+batch_size
        
        inputs = Variable(torch.from_numpy(X[s:e]).cuda(0))
        pred = model(inputs)
        
        predicted.append(pred.data.cpu().numpy())
        
        
    inputs = Variable(torch.from_numpy(X).cuda(0))
    predicted = model(inputs)
    
    predicted = predicted.data.cpu().numpy()
    
    for param in params:
        if param == 'acc':
            results.append(accuracy_score(Y, np.round(predicted)))
        if param == "auc":
            results.append(roc_auc_score(Y, predicted))
        if param == "recall":
            results.append(recall_score(Y, np.round(predicted)))
        if param == "precision":
            results.append(precision_score(Y, np.round(predicted)))
        if param == "fmeasure":
            precision = precision_score(Y, np.round(predicted))
            recall = recall_score(Y, np.round(predicted))
            results.append(2*precision*recall/ (precision+recall))
    return results

**Generate random data**
    
*Data format:*

Datatype - float32 (both X and Y)

X.shape - (#samples, 1, #timepoints, #channels)

Y.shape - (#samples)

In [6]:
X_train = np.random.rand(100, 1, 120, 64).astype('float32') # np.random.rand generates between [0, 1)
y_train = np.round(np.random.rand(100).astype('float32')) # binary data, so we round it to 0 or 1.

X_val = np.random.rand(100, 1, 120, 64).astype('float32')
y_val = np.round(np.random.rand(100).astype('float32'))

X_test = np.random.rand(100, 1, 120, 64).astype('float32')
y_test = np.round(np.random.rand(100).astype('float32'))

### Loading the Data

In [8]:
training_files = pd.read_csv("clean_trainingfiles.csv")

In [15]:
user_1 = array(training_files[training_files['user'] == 1]["path"])

In [16]:
attention = "EEG_attention_dataset.csv"
effort = "EEG_effort_dataset.csv"
interest = "EEG_interest_dataset.csv"

In [28]:
user_1_tests = {}

for test in user_1:
    datasets = [test + "/" + attention, test + "/" + interest, test + "/" + effort]
    user_1_tests[extract_test_number(test)] = datasets

user_1_tests
    

test10
test11
test12
test14
test15
test13
test1
test2
test3
test5
test8
test6
test9
test7
test4


{10: ['/cs/home/ybk1/Dissertation/Experiment Anonymised Version/User001_Group_Test_30-08-2019--10-07-00/User001B_test10_IDT_10_30-08-2019/User001B_test10_IDT_10_30-08-2019_recording0/EEG_attention_dataset.csv',
  '/cs/home/ybk1/Dissertation/Experiment Anonymised Version/User001_Group_Test_30-08-2019--10-07-00/User001B_test10_IDT_10_30-08-2019/User001B_test10_IDT_10_30-08-2019_recording0/EEG_interest_dataset.csv',
  '/cs/home/ybk1/Dissertation/Experiment Anonymised Version/User001_Group_Test_30-08-2019--10-07-00/User001B_test10_IDT_10_30-08-2019/User001B_test10_IDT_10_30-08-2019_recording0/EEG_effort_dataset.csv'],
 11: ['/cs/home/ybk1/Dissertation/Experiment Anonymised Version/User001_Group_Test_30-08-2019--10-07-00/User001C_test11_BDT_11_30-08-2019/User001C_test11_BDT_11_30-08-2019_recording0/EEG_attention_dataset.csv',
  '/cs/home/ybk1/Dissertation/Experiment Anonymised Version/User001_Group_Test_30-08-2019--10-07-00/User001C_test11_BDT_11_30-08-2019/User001C_test11_BDT_11_30-08-2019

In [29]:
test_10_attention = pd.read_csv(user_1_tests[10][0])
test_10_attention 

Unnamed: 0,C1,C2,C3,C4,C5,C6,C7,C8,Timestamp,attention
0,208520.203125,-266677.78125,249952.484375,229751.750000,204797.812500,184424.953125,190706.890625,223252.703125,1.459584e+06,5.0
1,208641.656250,-266710.84375,250072.890625,229724.140625,204892.375000,184429.046875,190821.140625,223333.437500,1.459619e+06,5.0
2,208745.796875,-266736.40625,250180.078125,229704.781250,204981.453125,184438.437500,190925.000000,223416.156250,1.459630e+06,5.0
3,208799.625000,-266741.53125,250235.531250,229702.687500,205035.140625,184457.234375,190988.031250,223475.156250,1.459649e+06,5.0
4,208788.093750,-266730.53125,250224.328125,229715.281250,205030.843750,184471.921875,190984.796875,223486.359375,1.459660e+06,5.0
...,...,...,...,...,...,...,...,...,...,...
2395,208734.625000,-266508.21875,250261.953125,230237.031250,205071.859375,184635.234375,191025.703125,223626.453125,1.515933e+06,4.0
2396,208765.671875,-266517.28125,250284.265625,230226.828125,205075.765625,184626.453125,191036.156250,223619.921875,1.515959e+06,4.0
2397,208866.812500,-266544.81250,250380.109375,230203.843750,205141.765625,184625.546875,191120.078125,223670.562500,1.516002e+06,4.0
2398,209003.328125,-266569.15625,250514.109375,230186.390625,205246.328125,184640.578125,191244.343750,223763.437500,1.516039e+06,4.0


In [37]:
t10_AT_X_train = test_10_attention.iloc[:,:-2]
t10_AT_y_train = test_10_attention.iloc[:,-1]

print("X_train shape {0}\n y_train shape {1}".format(t10_AT_X_train.shape, t10_AT_y_train.shape))

X_train shape (2400, 8)
 y_train shape (2400,)


Scale the data

**Run**

In [7]:
batch_size = 32

for epoch in range(10):  # loop over the dataset multiple times
    print ("\nEpoch ", epoch)
    
    running_loss = 0.0
    for i in range(len(X_train)//batch_size-1):
        s = i*batch_size
        e = i*batch_size+batch_size
        
        inputs = torch.from_numpy(X_train[s:e])
        labels = torch.FloatTensor(np.array([y_train[s:e]]).T*1.0)
        
        # wrap them in Variable
        inputs, labels = Variable(inputs.cuda(0)), Variable(labels.cuda(0))

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        
        
        optimizer.step()
        
        running_loss += loss.data
    
    # Validation accuracy
    params = ["acc", "auc", "fmeasure"]
    print (params)
    print ("Training Loss ", running_loss)
    print ("Train - ", evaluate(net, X_train, y_train, params))
    print ("Validation - ", evaluate(net, X_val, y_val, params))
    print ("Test - ", evaluate(net, X_test, y_test, params))




Epoch  0
['acc', 'auc', 'fmeasure']
Training Loss  tensor(1.8985, device='cuda:0')
Train -  [0.45, 0.4722222222222222, 0.6206896551724138]
Validation -  [0.43, 0.5491071428571428, 0.6013986013986014]
Test -  [0.51, 0.5120192307692307, 0.6666666666666667]

Epoch  1
['acc', 'auc', 'fmeasure']
Training Loss  tensor(1.8078, device='cuda:0')
Train -  [0.47, 0.5092592592592593, 0.6131386861313869]
Validation -  [0.45, 0.42045454545454536, 0.5925925925925926]
Test -  [0.5, 0.5092147435897436, 0.6527777777777778]

Epoch  2
['acc', 'auc', 'fmeasure']
Training Loss  tensor(1.6372, device='cuda:0')
Train -  [0.51, 0.5865539452495975, 0.608]
Validation -  [0.51, 0.48011363636363635, 0.6201550387596899]
Test -  [0.48, 0.4979967948717949, 0.5873015873015872]

Epoch  3
['acc', 'auc', 'fmeasure']
Training Loss  tensor(1.4024, device='cuda:0')
Train -  [0.54, 0.5837359098228664, 0.5490196078431373]
Validation -  [0.49, 0.4679383116883116, 0.5142857142857142]
Test -  [0.51, 0.4803685897435897, 0.566371



['acc', 'auc', 'fmeasure']
Training Loss  tensor(1.3420, device='cuda:0')
Train -  [0.65, 0.6694847020933977, 0.46153846153846156]
Validation -  [0.52, 0.42207792207792205, 0.25000000000000006]
Test -  [0.48, 0.4851762820512821, 0.2571428571428571]

Epoch  8
['acc', 'auc', 'fmeasure']
Training Loss  tensor(1.3417, device='cuda:0')
Train -  [0.57, 0.532608695652174, 0.3174603174603175]
Validation -  [0.52, 0.5040584415584415, 0.27272727272727276]
Test -  [0.45, 0.5424679487179487, 0.1791044776119403]

Epoch  9
['acc', 'auc', 'fmeasure']
Training Loss  tensor(1.3416, device='cuda:0')
Train -  [0.59, 0.5499194847020934, 0.2807017543859649]
Validation -  [0.51, 0.4249188311688311, 0.3466666666666667]
Test -  [0.52, 0.5669070512820512, 0.3142857142857143]


