In [1]:
#!pip install neptune-client torch torchvision
#!pip install simplejson
#!pip install torchinfo
#!pip install neptune-notebooks
#!jupyter nbextension enable --py neptune-notebooks
#!python3 -m pip install torch
#!python3 -V
#!which python3
#!conda install pytorch torchvision torchaudio cudatoolkit=11.3 -c pytorch
#!pip install neptune-client
#!pip install torchinfo
#!conda install torchinfo

# Install pip packages in the current Jupyter kernel
#import sys
#!{sys.executable} -m pip install torchinfo
#!{sys.executable} -m pip install neptune-notebooks
#!{sys.executable} -m pip install neptune-client




In [2]:
!which python3
import os
import torch
import numpy as np
from torch import nn
from torch.utils.data import DataLoader
from dataset import EEGDataset
from torch.utils.data import random_split
import neptune.new as neptune
from torchinfo import summary


/opt/anaconda-2021.11/bin/python3
Classification dataset version: apr-16-22-v1


In [3]:
# load in the dataset

#raw_data_dir = '//uni.au.dk/dfs/Tech_EarEEG/Students/RD2022_Artefact_AkselStark/data/1A/study_1A_mat_simple'
raw_data_dir = '../data'

trainingNights = 9
testNights = 3
segLength = 750
filtered = True
dataSkips = 16

print("\nTest set")
ds2 = EEGDataset(raw_data_dir,testNights, segLength, skips = trainingNights + dataSkips, filtered = filtered)

print("\n\nTraining set")
ds1 = EEGDataset(raw_data_dir,trainingNights, segLength, skips = dataSkips , filtered = filtered) #Instantiate a dataset using the directory of data, amount of night to include and amount of samples in a segment




Test set
../data/study_1A_mat_simple/S_01/night_1/artefact_annotations.npy
../data/study_1A_mat_simple/S_01/night_1/EEG_raw_250hz_unfiltered.npy
../data/study_1A_mat_simple/S_01/night_2/artefact_annotations.npy
../data/study_1A_mat_simple/S_01/night_2/EEG_raw_250hz_unfiltered.npy
../data/study_1A_mat_simple/S_01/night_3/artefact_annotations.npy
../data/study_1A_mat_simple/S_01/night_3/EEG_raw_250hz_unfiltered.npy
../data/study_1A_mat_simple/S_01/night_4/artefact_annotations.npy
../data/study_1A_mat_simple/S_01/night_4/EEG_raw_250hz_unfiltered.npy
../data/study_1A_mat_simple/S_02/night_1/artefact_annotations.npy
../data/study_1A_mat_simple/S_02/night_1/EEG_raw_250hz_unfiltered.npy
../data/study_1A_mat_simple/S_02/night_2/artefact_annotations.npy
../data/study_1A_mat_simple/S_02/night_2/EEG_raw_250hz_unfiltered.npy
../data/study_1A_mat_simple/S_02/night_3/artefact_annotations.npy
../data/study_1A_mat_simple/S_02/night_3/EEG_raw_250hz_unfiltered.npy
../data/study_1A_mat_simple/S_02/night

In [4]:
#Calculate class imbalance
balancing_dataloader = DataLoader(ds1, drop_last = True) # Drop_last, to avoid incomplete batches, which won't work with weighted loss
artefacts = 0
good_samples = 0
for batch, (X, y) in enumerate(balancing_dataloader):
    if y == 1:
        artefacts += 1
    else:
        good_samples += 1
    if batch > 100000:
        break

class_ratio = good_samples/artefacts
print(f"class_ratio:{class_ratio}")


class_ratio:0.9779659005498635


In [5]:
device = 'cuda' if torch.cuda.is_available() else 'cpu' #Check for cuda 
print(f'Using {device} device')

class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        #self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.BatchNorm1d(segLength),
            nn.Linear(segLength, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 1),
            nn.Sigmoid(), 
        )

    def forward(self, x):
        #x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

Using cuda device


In [6]:
learning_rate = 1e-5
batch_size = 64


model = NeuralNetwork()
model.to(device)

#pos_weight: amount of positive examples compared to negative examples. Calculate as: negative_examples/positive_examples
loss = nn.BCEWithLogitsLoss(pos_weight = class_ratio*torch.ones([batch_size]).to(device)) 

#optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [7]:
def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    
    for batch, (X, y) in enumerate(dataloader):
        X = X.to(device)
        y = y.to(device)
        # Compute prediction and loss
        pred = model(X)# Reshape to 1 dimension if using binary classification, otherwise keep dimensions from model output
        pred = pred.reshape(-1)
        pred = pred.to(device)
        yFloat = y.type(torch.FloatTensor).to(device)
        
        loss = loss_fn(pred, yFloat)
        


        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        # Neptune logging
        run["training/batch/loss"].log(loss)
        

        if batch % 1000 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")
        
        if batch % 10000 == 0:
            print(f"Predicted values: \n{pred}")
            print(f"Actual values: \n{yFloat}")
            print(f"Difference: \n{(yFloat-pred)}")


def test_loop(dataloader_test, model, loss_fn, test_set = True):
    size = len(dataloader_test.dataset)
    num_batches = len(dataloader_test)
    test_loss, correct = 0, 0

    with torch.no_grad():
        for X, y in dataloader_test:
            X = X.to(device)
            y = y.to(device)
            pred = model(X).reshape(-1).to(device) # Reshape to 1 dimension if using binary classification, otherwise keep dimensions from model output
            test_loss += loss_fn(pred, y.type(torch.FloatTensor).to(device)).item()
            correct += (pred.round() == y).type(torch.float).sum().item()
            

    test_loss /= num_batches
    correct /= size
    
    
    
    if test_set:
        print(f"Test set Error: \n Test Set Accuracy: {(100*correct):>0.5f}%, Avg Test Set loss: {test_loss:>8f} \n")
        
        # Neptune logging
        run["testing/batch/test_loss"].log(test_loss)
        run["testing/batch/test_Acc"].log(100*correct)
    
    else:
        print(f"Training Set Error: \n Training Set Accuracy: {(100*correct):>0.5f}%, Avg Training Set loss: {test_loss:>8f} \n")
        
        # Neptune logging
        run["testing/batch/training_loss"].log(test_loss)
        run["testing/batch/training_Acc"].log(100*correct)

In [8]:
# Split data into train and test data (Commented out: Changed to loading different dataset class instances)
#trainSamples = int(ds1.__len__()*0.7)
#testSamples = int(ds1.__len__() - trainSamples)
#training_data, test_data = random_split(ds1, (trainSamples,testSamples), generator=torch.Generator().manual_seed(42))

#train_dataloader = DataLoader(training_data, batch_size=64, drop_last = True) # Drop_last, to avoid incomplete batches, which won't work with weighted loss
#test_dataloader = DataLoader(test_data, batch_size=64, drop_last = True) # Drop_last, to avoid incomplete batches, which won't work with weighted loss


train_dataloader = DataLoader(ds1, batch_size=64, drop_last = True) # Drop_last, to avoid incomplete batches, which won't work with weighted loss
test_dataloader = DataLoader(ds2, batch_size=64, drop_last = True) # Drop_last, to avoid incomplete batches, which won't work with weighted loss



In [9]:
# Initialize neptune
run = neptune.init(
    project="aksel.s.madsen/artefact-detection",
    api_token="eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiIxYTA4NzcxMy1lYmQ2LTQ3NTctYjRhNC02Mzk1NjdjMWM0NmYifQ==",
    source_files=["trainModel.ipynb", "dataset.py"]
)  # Credentials


run['config/dataset/size'] = trainingNights # dict() object
run['config/model'] = type(model).__name__
run['config/modelSummary'] = summary(model, input_size=(batch_size, segLength))
run['config/optimizer'] = type(optimizer).__name__
run['config/batch_size'] = batch_size
run['config/test_night'] = testNights
run['config/learning_rate'] = learning_rate
run['config/segLength'] = segLength
run['config/filtered'] = filtered
run['config/dataSkips'] = dataSkips



https://app.neptune.ai/aksel.s.madsen/artefact-detection/e/AR-61
Remember to stop your run once you’ve finished logging your metadata (https://docs.neptune.ai/api-reference/run#.stop). It will be stopped automatically only when the notebook kernel/interactive console is terminated.


In [None]:
epochs = 10
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    
    train_loop(train_dataloader, model, loss, optimizer)    
    test_loop(test_dataloader, model, loss)
    test_loop(train_dataloader, model, loss, test_set = False)

print("Done!")

Epoch 1
-------------------------------
loss: 0.647066  [    0/2277745]
Predicted values: 
tensor([0.5108, 0.5108, 0.5108, 0.5107, 0.5108, 0.5107, 0.5108, 0.5107, 0.5105,
        0.5107, 0.5107, 0.5107, 0.5108, 0.5107, 0.5108, 0.5107, 0.5108, 0.5107,
        0.5108, 0.5107, 0.5108, 0.5107, 0.5108, 0.5105, 0.5108, 0.5109, 0.5105,
        0.5107, 0.5108, 0.5115, 0.5092, 0.5111, 0.5108, 0.5107, 0.5098, 0.5107,
        0.4949, 0.5108, 0.5108, 0.5107, 0.5108, 0.5107, 0.5110, 0.5087, 0.5108,
        0.5088, 0.5108, 0.5080, 0.5120, 0.5105, 0.5108, 0.5107, 0.5108, 0.5106,
        0.5108, 0.5108, 0.5104, 0.5107, 0.5109, 0.5107, 0.5111, 0.5107, 0.5109,
        0.5107], device='cuda:0', grad_fn=<ReshapeAliasBackward0>)
Actual values: 
tensor([1., 0., 1., 0., 1., 0., 1., 0., 1., 0., 1., 0., 1., 0., 1., 0., 1., 0.,
        1., 0., 1., 0., 1., 1., 1., 0., 1., 0., 1., 1., 1., 1., 1., 0., 1., 0.,
        1., 0., 1., 0., 1., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 0.,
        1., 0., 1., 0., 1.

In [None]:
# Test the model, and inspect the errors
from datetime import datetime
now = datetime.now()

print(f"../trained_models/model_{now.strftime('%m_%d_%Y_%H_%M_%S')}")

model_scripted = torch.jit.script(model) # Export to TorchScript
model_scripted.save(f"../trained_models/model_{now.strftime('%m_%d_%Y_%H_%M_%S')}") # Save

print('debug')
#randChannel = 
#testData = ds2
#self.labels[channel, start : start + self.sectionLength]


In [None]:
new_model = torch.load("../trained_models/model_03_21_2022_23_19_06")

In [None]:
run.stop() # Stop the neptune logging run