In [47]:
import numpy as np

from sklearn.metrics import roc_auc_score, precision_score, recall_score, accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable, gradcheck
from torch.utils.data import TensorDataset, DataLoader

import pandas as pd

from matplotlib import pyplot

import mne
from mne.io import concatenate_raws, read_raw_fif
import mne.viz

import math

from os import walk

In [48]:
# take a look at some of the data
data_file = 'study1_eeg/study1_EEG_P-01_FN_Trial-001.csv'

data_P_09 = pd.read_csv(data_file)
data_P_09.tail()

Unnamed: 0,-0.698,-0.762,-0.944,-1.122,-1.108,-0.723,0.07,1.125,2.155,2.898,...,20.833,20.093,19.654,19.536,19.697,20.011,20.335,20.588,20.827,21.199
58,-3.356,-3.23,-2.968,-2.78,-2.813,-3.067,-3.422,-3.729,-3.881,-3.842,...,-5.171,-5.669,-6.558,-7.532,-8.214,-8.319,-7.755,-6.61,-5.066,-3.331
59,-0.936,-1.833,-2.833,-3.619,-3.919,-3.637,-2.916,-2.085,-1.501,-1.373,...,-3.416,-2.966,-2.921,-3.143,-3.33,-3.237,-2.777,-1.95,-0.725,0.973
60,1.232,0.121,-0.805,-1.356,-1.463,-1.185,-0.671,-0.093,0.447,0.963,...,-1.272,-2.243,-3.477,-4.764,-5.79,-6.248,-5.933,-4.799,-2.954,-0.63
61,0.585,0.034,-0.144,0.065,0.452,0.721,0.667,0.278,-0.293,-0.81,...,-7.846,-7.245,-6.739,-6.557,-6.794,-7.355,-8.004,-8.513,-8.786,-8.876
62,4.899,5.001,4.836,4.464,3.933,3.295,2.64,2.102,1.814,1.85,...,-2.848,-2.51,-2.391,-2.514,-2.775,-2.982,-2.92,-2.431,-1.453,-0.03


In [49]:
# take some data that was already formatted, from this link: https://neuro.inf.unibe.ch/AlgorithmsNeuroscience/Tutorial_files/DatasetConstruction.html
data_file = 'study1_eeg/epochdata/master'

# Read the EEG epochs:
epochs = mne.read_epochs(data_file + '.fif', verbose='error')
print(epochs)

<EpochsFIF  |   16666 events (all good), 0 - 1.49609 sec, baseline off, ~3.05 GB, data loaded,
 'FN': 4336
 'FP': 4371
 'FU': 4239
 'NN': 1238
 'NP': 1264
 'NU': 1218>


In [50]:
epochs_UN = epochs['FU', 'FN'] # Unpleasant vs. Neutral
epochs_UP = epochs['FU', 'FP'] # Unpleasant vs. Pleasant
epochs_NP = epochs['FN', 'FP'] # Neutral vs. Pleasant

# Dataset with unpleasant and neutral events
print(epochs_UN)
data_UN = epochs_UN.get_data() #we will classify between unpleasant and neutral
labels_UN = epochs_UN.events[:,-1]
print(len(labels_UN))

<EpochsFIF  |   8575 events (all good), 0 - 1.49609 sec, baseline off, ~1.57 GB, data loaded,
 'FN': 4336
 'FU': 4239>
8575


In [51]:
if torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

In [52]:
torch.manual_seed(3)
torch.cuda.manual_seed(3)
train_data_UN, test_data_UN, labels_train_UN, labels_test_UN = train_test_split(data_UN, labels_UN, test_size=0.3, random_state=42)

In [53]:
print(labels_train_UN.shape, labels_test_UN.shape)
print(labels_train_UN.shape, labels_test_UN.shape, train_data_UN.shape[-1])
chunk_train = labels_train_UN.shape[0]
chunk_test = labels_test_UN.shape[0]
channels = train_data_UN.shape[1]
timepoints = train_data_UN.shape[2]


(6002,) (2573,)
(6002,) (2573,) 384


In [54]:
BATCH_SIZE = 512
BATCH_SIZE2 = 512

eeg_data_scaler = StandardScaler()

X_train = eeg_data_scaler.fit_transform(train_data_UN.reshape(-1, train_data_UN.shape[-1])).reshape(train_data_UN.shape)
X_test = eeg_data_scaler.fit_transform(test_data_UN.reshape(-1, test_data_UN.shape[-1])).reshape(test_data_UN.shape)

labels_train_UN = np.array([1 if x > 0 else 0 for x in labels_train_UN])
labels_test_UN = np.array([1 if x > 0 else 0 for x in labels_test_UN])

labels_train_UN = labels_train_UN.reshape((chunk_train, 1))
labels_train_UN = labels_train_UN.astype(np.float32)
X_actual = torch.from_numpy(labels_train_UN)

labels_test_UN = labels_test_UN.reshape((chunk_test, 1))
labels_test_UN = labels_test_UN.astype(np.float32)
X_test_actual = torch.from_numpy(labels_test_UN)

X_train = torch.from_numpy(X_train)
X_train = X_train.unsqueeze(1)
X_test = torch.from_numpy(X_test)
X_test = X_test.unsqueeze(1)

train_batches = math.ceil(chunk_train / BATCH_SIZE)
test_batches = math.ceil(chunk_test / BATCH_SIZE2)
print(X_train.shape, X_actual.shape, X_test.shape, train_batches, test_batches)

torch.Size([6002, 1, 64, 384]) torch.Size([6002, 1]) torch.Size([2573, 1, 64, 384]) 12 6


In [55]:
train_set = TensorDataset(X_train, X_actual)
test_set = TensorDataset(X_test, X_test_actual)

train_set_loader = DataLoader(train_set, batch_size = BATCH_SIZE, shuffle=False)
test_set_loader = DataLoader(test_set, batch_size = BATCH_SIZE2, shuffle=False)

In [71]:
# hyperparameters
freq, avg1stride, avg2stride = 256, (1, 4), (1, 8)
convstride = 1 # stride for each conv2D
conv1_neurons = 8
conv2_neurons = 16
conv3_neurons = 32
conv4_neurons = 16
flat1_out = 12
kern1size = freq // 2
kern3size = 32

In [72]:
padding_needed = (kern1size - 1) / 2
conv1outx, conv1outy = (channels, (timepoints + (2 * padding_needed) - kern1size)/convstride + 1)

conv2outx, conv2outy = ((conv1outx - channels)/convstride + 1, conv1outy)
conv2outx, conv2outy = conv2outx // avg1stride[0], conv2outy // avg1stride[1]

conv3outx, conv3outy = (conv2outx, (conv2outy - kern3size)/convstride + 1)

conv4outx, conv4outy = (conv3outx, conv3outy)
conv4outx, conv4outy = (conv4outx // avg2stride[0], conv4outy // avg2stride[1])
flat1_in = int(conv4outx * conv4outy * conv4_neurons)

In [73]:
CNNPoor = nn.Sequential(
    nn.ZeroPad2d((math.floor(padding_needed), math.ceil(padding_needed), 0, 0)),
    nn.Conv2d(1, conv1_neurons, (1, kern1size), bias=False),
    nn.ELU(),
    nn.BatchNorm2d(conv1_neurons),
    
    nn.Conv2d(conv1_neurons, conv2_neurons, (channels, 1), bias=False, groups = conv1_neurons),
    nn.ELU(),
    nn.BatchNorm2d(conv2_neurons),
    nn.AvgPool2d(avg1stride),
    nn.Dropout(p = 0.25),
    
    nn.Conv2d(conv2_neurons, conv3_neurons, (1, kern3size), bias=False, groups = conv2_neurons),
    nn.Conv2d(conv3_neurons, conv4_neurons, kernel_size=1, bias=False),
    nn.ELU(),
    nn.BatchNorm2d(conv4_neurons),
    nn.AvgPool2d(avg2stride),
    nn.Dropout(p = 0.25),
    
    nn.Flatten(),

    nn.Linear(flat1_in, flat1_out),
    nn.Linear(flat1_out, 1),
    nn.Sigmoid(),
)

CNNPoor.to(device)

Sequential(
  (0): ZeroPad2d(padding=(63, 64, 0, 0), value=0.0)
  (1): Conv2d(1, 8, kernel_size=(1, 128), stride=(1, 1), bias=False)
  (2): ELU(alpha=1.0)
  (3): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (4): Conv2d(8, 16, kernel_size=(64, 1), stride=(1, 1), groups=8, bias=False)
  (5): ELU(alpha=1.0)
  (6): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (7): AvgPool2d(kernel_size=(1, 4), stride=(1, 4), padding=0)
  (8): Dropout(p=0.25, inplace=False)
  (9): Conv2d(16, 32, kernel_size=(1, 32), stride=(1, 1), groups=16, bias=False)
  (10): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
  (11): ELU(alpha=1.0)
  (12): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (13): AvgPool2d(kernel_size=(1, 8), stride=(1, 8), padding=0)
  (14): Dropout(p=0.25, inplace=False)
  (15): Flatten()
  (16): Linear(in_features=128, out_features=12, bias=True)
  (17): Linear(in_features=

In [74]:
loss_function = nn.BCELoss()
optimizer = optim.Adam(CNNPoor.parameters(), lr = 0.005)

In [75]:
def test(model, device, test_loader):
    
    model.eval()
    correct = 0
    tot_loss = 0
    acc_score, prec_score, rec_score = 0, 0, 0
    with torch.no_grad():
        for (data, labels) in test_loader:
            data, labels = data.to(device), labels.to(device)
            
            classification = model(data.float())
            loss = loss_function(classification, labels)

            pred = torch.round(classification)
            correct += pred.eq(labels.view_as(pred)).sum().item()
            tot_loss += loss.item()

            acc_score += accuracy_score(labels.cpu(), pred.cpu())
            prec_score += precision_score(labels.cpu(), pred.cpu())
            rec_score += recall_score(labels.cpu(), pred.cpu())

        print("\nTest set: Average loss: {:.6f}, Accuracy: {:.6f}".format(tot_loss / test_batches, 
                                                                          correct / len(test_loader.dataset)))
        print("sklearn accuracy: {:.6f} precision: {:.6f} recall: {:.6f}\n".format(acc_score / test_batches,
                                                                                   prec_score / test_batches,
                                                                                   rec_score / test_batches))

In [76]:
def train(model, device, train_loader, optimizer, epoch):

    model.train()
    correct = 0
    batch = 0
    tot_loss = 0
    for (data, labels) in train_loader:
        data, labels = data.to(device), labels.to(device)
        
        optimizer.zero_grad()
        
        classification = model(data.float())
        loss = loss_function(classification, labels)

        loss.backward()
        optimizer.step()

        pred = torch.round(classification)
        correct += pred.eq(labels.view_as(pred)).sum().item()
        tot_loss += loss.item()

        batch += 1

        if batch == train_batches:
            print("Epoch: {}".format(epoch))
            print("\tAverage loss: {:.6f}".format(tot_loss / batch))
            print("\tAccuracy: {:.6f}".format(correct / len(train_loader.dataset)))

In [77]:
for epoch in range(25):
    train(CNNPoor, device, train_set_loader, optimizer, epoch)
    test(CNNPoor, device, test_set_loader)

Epoch: 0
	Average loss: 0.701715
	Accuracy: 0.512829

Test set: Average loss: 0.693126, Accuracy: 0.524291
sklearn accuracy: 0.514098 precision: 0.503505 recall: 0.498959

Epoch: 1
	Average loss: 0.695039
	Accuracy: 0.529324

Test set: Average loss: 0.699401, Accuracy: 0.530509
sklearn accuracy: 0.519306 precision: 0.508677 recall: 0.503194

Epoch: 2
	Average loss: 0.685619
	Accuracy: 0.553149

Test set: Average loss: 0.684726, Accuracy: 0.551108
sklearn accuracy: 0.574043 precision: 0.577953 recall: 0.500385

Epoch: 3
	Average loss: 0.674006
	Accuracy: 0.579807

Test set: Average loss: 0.683235, Accuracy: 0.556937
sklearn accuracy: 0.566431 precision: 0.558929 recall: 0.571830

Epoch: 4
	Average loss: 0.665798
	Accuracy: 0.593469

Test set: Average loss: 0.678056, Accuracy: 0.561601
sklearn accuracy: 0.595328 precision: 0.606513 recall: 0.526064

Epoch: 5
	Average loss: 0.660601
	Accuracy: 0.606131

Test set: Average loss: 0.674534, Accuracy: 0.565099
sklearn accuracy: 0.610752 precis