In [1]:
#!pip install neptune-client torch torchvision
#!pip install simplejson
#!pip install torchinfo
#!pip install neptune-notebooks
#!jupyter nbextension enable --py neptune-notebooks

In [2]:
import os
import torch
import numpy as np
from torch import nn
from torch.utils.data import DataLoader
from dataset import EEGDataset
from torch.utils.data import random_split
import neptune.new as neptune
from torchinfo import summary


Classification dataset version: mar-30-1


In [3]:
# load in the dataset

#raw_data_dir = '//uni.au.dk/dfs/Tech_EarEEG/Students/RD2022_Artefact_AkselStark/data/1A/study_1A_mat_simple'
raw_data_dir = '../data'

trainingNights = 7
testNights = 2
segLength = 750

print("Test set")
ds2 = EEGDataset(raw_data_dir,testNights, segLength, skips = trainingNights)

print("Training set")
ds1 = EEGDataset(raw_data_dir,trainingNights, segLength, skips = 0) #Instantiate a dataset using the directory of data, amount of night to include and amount of samples in a segment



Test set
../data/study_1A_mat_simple/S_01/night_1/artefact_annotations.npy
../data/study_1A_mat_simple/S_01/night_1/EEG_raw_250hz.npy
../data/study_1A_mat_simple/S_01/night_2/artefact_annotations.npy
../data/study_1A_mat_simple/S_01/night_2/EEG_raw_250hz.npy
../data/study_1A_mat_simple/S_01/night_3/artefact_annotations.npy
../data/study_1A_mat_simple/S_01/night_3/EEG_raw_250hz.npy
../data/study_1A_mat_simple/S_01/night_4/artefact_annotations.npy
../data/study_1A_mat_simple/S_01/night_4/EEG_raw_250hz.npy
../data/study_1A_mat_simple/S_02/night_1/artefact_annotations.npy
../data/study_1A_mat_simple/S_02/night_1/EEG_raw_250hz.npy
../data/study_1A_mat_simple/S_02/night_2/artefact_annotations.npy
../data/study_1A_mat_simple/S_02/night_2/EEG_raw_250hz.npy
../data/study_1A_mat_simple/S_02/night_3/artefact_annotations.npy
../data/study_1A_mat_simple/S_02/night_3/EEG_raw_250hz.npy
../data/study_1A_mat_simple/S_02/night_4/artefact_annotations.npy
Lables for night 0 loaded
../data/study_1A_mat_sim

In [4]:
#Calculate class imbalance
balancing_dataloader = DataLoader(ds1, drop_last = True) # Drop_last, to avoid incomplete batches, which won't work with weighted loss
artefacts = 0
good_samples = 0
for batch, (X, y) in enumerate(balancing_dataloader):
    if y == 1:
        artefacts += 1
    else:
        good_samples += 1
    if batch > 100000:
        break

class_ratio = good_samples/artefacts
print(f"class_ratio:{class_ratio}")


class_ratio:0.9875976387812295


In [5]:
device = 'cuda' if torch.cuda.is_available() else 'cpu' #Check for cuda 
print(f'Using {device} device')

class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        #self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.BatchNorm1d(segLength+2),
            nn.Linear(segLength+2, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 1),
            nn.Sigmoid(), 
        )

    def forward(self, x):
        #x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

Using cuda device


In [6]:
learning_rate = 1e-5
batch_size = 64


model = NeuralNetwork()
model.to(device)

#pos_weight: amount of positive examples compared to negative examples. Calculate as: negative_examples/positive_examples
loss = nn.BCEWithLogitsLoss(pos_weight = class_ratio*torch.ones([batch_size]).to(device)) 

#optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [7]:
def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    
    for batch, (X, y) in enumerate(dataloader):
        X = X.to(device)
        y = y.to(device)
        # Compute prediction and loss
        pred = model(X)# Reshape to 1 dimension if using binary classification, otherwise keep dimensions from model output
        pred = pred.reshape(-1)
        pred = pred.to(device)
        yFloat = y.type(torch.FloatTensor).to(device)
        
        loss = loss_fn(pred, yFloat)
        


        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        # Neptune logging
        run["training/batch/loss"].log(loss)
        

        if batch % 1000 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")
        
        if batch % 10000 == 0:
            print(f"Predicted values: \n{pred}")
            print(f"Actual values: \n{yFloat}")
            print(f"Difference: \n{(yFloat-pred)}")


def test_loop(dataloader_test, model, loss_fn, test_set = True):
    size = len(dataloader_test.dataset)
    num_batches = len(dataloader_test)
    test_loss, correct = 0, 0

    with torch.no_grad():
        for X, y in dataloader_test:
            X = X.to(device)
            y = y.to(device)
            pred = model(X).reshape(-1).to(device) # Reshape to 1 dimension if using binary classification, otherwise keep dimensions from model output
            test_loss += loss_fn(pred, y.type(torch.FloatTensor).to(device)).item()
            correct += (pred.round() == y).type(torch.float).sum().item()
            

    test_loss /= num_batches
    correct /= size
    
    
    
    if test_set:
        print(f"Test set Error: \n Test Set Accuracy: {(100*correct):>0.5f}%, Avg Test Set loss: {test_loss:>8f} \n")
        
        # Neptune logging
        run["testing/batch/test_loss"].log(test_loss)
        run["testing/batch/test_Acc"].log(100*correct)
    
    else:
        print(f"Training Set Error: \n Training Set Accuracy: {(100*correct):>0.5f}%, Avg Training Set loss: {test_loss:>8f} \n")
        
        # Neptune logging
        run["testing/batch/training_loss"].log(test_loss)
        run["testing/batch/training_Acc"].log(100*correct)

In [8]:
# Split data into train and test data (Commented out: Changed to loading different dataset class instances)
#trainSamples = int(ds1.__len__()*0.7)
#testSamples = int(ds1.__len__() - trainSamples)
#training_data, test_data = random_split(ds1, (trainSamples,testSamples), generator=torch.Generator().manual_seed(42))

#train_dataloader = DataLoader(training_data, batch_size=64, drop_last = True) # Drop_last, to avoid incomplete batches, which won't work with weighted loss
#test_dataloader = DataLoader(test_data, batch_size=64, drop_last = True) # Drop_last, to avoid incomplete batches, which won't work with weighted loss


train_dataloader = DataLoader(ds1, batch_size=64, drop_last = True) # Drop_last, to avoid incomplete batches, which won't work with weighted loss
test_dataloader = DataLoader(ds2, batch_size=64, drop_last = True) # Drop_last, to avoid incomplete batches, which won't work with weighted loss



In [10]:
# Initialize neptune
run = neptune.init(
    project="aksel.s.madsen/artefact-detection",
    api_token="eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiIxYTA4NzcxMy1lYmQ2LTQ3NTctYjRhNC02Mzk1NjdjMWM0NmYifQ==",
    source_files=["trainModel.ipynb", "dataset.py"]
)  # Credentials


run['config/dataset/size'] = trainingNights # dict() object
run['config/model'] = type(model).__name__
run['config/modelSummary'] = summary(model, input_size=(batch_size, segLength + 2))
run['config/optimizer'] = type(optimizer).__name__
run['config/batch_size'] = batch_size
run['config/test_night'] = testNights
run['config/learning_rate'] = learning_rate
run['config/segLength'] = segLength



https://app.neptune.ai/aksel.s.madsen/artefact-detection/e/AR-37
Remember to stop your run once you’ve finished logging your metadata (https://docs.neptune.ai/api-reference/run#.stop). It will be stopped automatically only when the notebook kernel/interactive console is terminated.


In [None]:
epochs = 10
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    
    train_loop(train_dataloader, model, loss, optimizer)    
    test_loop(test_dataloader, model, loss)
    test_loop(train_dataloader, model, loss, test_set = False)

print("Done!")

Epoch 1
-------------------------------
loss: 0.666107  [    0/1650495]
Predicted values: 
tensor([0.4896, 0.4903, 0.4902, 0.4902, 0.4828, 0.4904, 0.4907, 0.4904, 0.4923,
        0.4902, 0.4740, 0.4905, 0.4817, 0.4906, 0.4901, 0.4912, 0.4963, 0.4923,
        0.4967, 0.4921, 0.4894, 0.4902, 0.4919, 0.4902, 0.4882, 0.4903, 0.4915,
        0.4901, 0.4921, 0.4910, 0.4921, 0.4909, 0.4916, 0.4905, 0.4641, 0.4908,
        0.4928, 0.4908, 0.4895, 0.4898, 0.4869, 0.4907, 0.4914, 0.4952, 0.4578,
        0.4927, 0.4938, 0.4903, 0.4913, 0.4915, 0.4879, 0.4909, 0.4902, 0.4918,
        0.4844, 0.4899, 0.4905, 0.4904, 0.4930, 0.4899, 0.4891, 0.4898, 0.4906,
        0.4907], device='cuda:0', grad_fn=<ReshapeAliasBackward0>)
Actual values: 
tensor([1., 0., 1., 0., 1., 0., 1., 0., 1., 0., 1., 0., 1., 0., 1., 0., 1., 0.,
        1., 0., 1., 0., 1., 0., 1., 0., 1., 0., 1., 0., 1., 0., 1., 0., 1., 0.,
        1., 1., 1., 0., 1., 0., 1., 1., 1., 1., 1., 0., 1., 1., 1., 1., 1., 1.,
        1., 0., 1., 0., 1.

loss: 0.595177  [64000/1650495]
loss: 0.502264  [128000/1650495]
loss: 0.519503  [192000/1650495]
loss: 0.507177  [256000/1650495]
loss: 0.511677  [320000/1650495]
loss: 0.505144  [384000/1650495]
loss: 0.561956  [448000/1650495]
loss: 0.543433  [512000/1650495]
loss: 0.513977  [576000/1650495]
loss: 0.812391  [640000/1650495]
Predicted values: 
tensor([1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,
        1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,
        0.9942, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,
        1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,
        1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,
        1.0000, 1.0000, 0.9998, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,
        1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 0.7674, 1.0000, 1.0000,
        1.0000], device='cuda:0', grad_fn=<ReshapeAliasBackward0>)
Actual values: 
tensor([1

loss: 0.530794  [704000/1650495]
loss: 0.506454  [768000/1650495]
loss: 0.507176  [832000/1650495]
loss: 0.526207  [896000/1650495]
loss: 0.512748  [960000/1650495]
loss: 0.532470  [1024000/1650495]
loss: 0.513302  [1088000/1650495]
loss: 0.512414  [1152000/1650495]
loss: 0.540674  [1216000/1650495]
loss: 0.502548  [1280000/1650495]
Predicted values: 
tensor([1.0000e+00, 9.7867e-08, 1.0000e+00, 6.4314e-08, 1.0000e+00, 1.6121e-07,
        9.9240e-01, 1.2688e-07, 1.0000e+00, 6.8108e-08, 1.0000e+00, 2.2786e-07,
        1.0000e+00, 1.6209e-06, 1.0000e+00, 5.6692e-07, 1.0000e+00, 1.6422e-07,
        1.0000e+00, 1.0916e-07, 1.0000e+00, 2.6069e-07, 1.0000e+00, 1.3978e-06,
        1.0000e+00, 1.2951e-07, 1.0000e+00, 5.7398e-07, 1.0000e+00, 1.1943e-07,
        1.0000e+00, 8.8162e-08, 7.2625e-01, 1.1575e-07, 1.0000e+00, 2.1688e-07,
        1.0000e+00, 1.9333e-07, 1.0000e+00, 9.2004e-08, 1.0000e+00, 1.2680e-06,
        1.0000e+00, 9.7014e-08, 1.0000e+00, 3.2372e-06, 1.0000e+00, 1.1036e-07,
      

loss: 0.506578  [1344000/1650495]
loss: 0.507124  [1408000/1650495]
loss: 0.501763  [1472000/1650495]
loss: 0.850761  [1536000/1650495]
loss: 0.501268  [1600000/1650495]
Test set Error: 
 Test Set Accuracy: 87.27502%, Avg Test Set loss: 0.549091 

Training Set Error: 
 Training Set Accuracy: 92.78950%, Avg Training Set loss: 0.530899 

Epoch 5
-------------------------------
loss: 0.512458  [    0/1650495]
Predicted values: 
tensor([1.0000e+00, 6.4083e-10, 1.0000e+00, 1.3498e-08, 1.0000e+00, 3.9009e-09,
        9.0670e-01, 8.8489e-10, 1.0000e+00, 4.3976e-10, 1.0000e+00, 4.1283e-10,
        1.0000e+00, 3.1829e-10, 1.0000e+00, 3.7230e-10, 1.0000e+00, 1.2877e-09,
        1.0000e+00, 8.7330e-10, 9.8483e-01, 5.6928e-10, 3.7766e-08, 3.3389e-10,
        1.0000e+00, 6.5552e-10, 1.0000e+00, 1.5268e-10, 1.0000e+00, 3.5066e-10,
        1.0000e+00, 1.1375e-09, 4.8348e-03, 1.2432e-09, 2.0893e-03, 7.2167e-10,
        1.0000e+00, 3.4100e-09, 1.0000e+00, 8.6897e-07, 9.2539e-06, 1.0253e-09,
        1.0

In [None]:
# Test the model, and inspect the errors
from datetime import datetime
now = datetime.now()

print(f"../trained_models/model_{now.strftime('%m_%d_%Y_%H_%M_%S')}")

model_scripted = torch.jit.script(model) # Export to TorchScript
model_scripted.save(f"../trained_models/model_{now.strftime('%m_%d_%Y_%H_%M_%S')}") # Save

print('debug')
#randChannel = 
#testData = ds2
#self.labels[channel, start : start + self.sectionLength]


In [None]:
new_model = torch.load("../trained_models/model_03_21_2022_23_19_06")

In [None]:
run.stop() # Stop the neptune logging run

In [17]:
!which python3

/opt/anaconda-2020.11/bin/python3
/bin/bash: ^s: command not found
