In [1]:
!which python3
import os
import torch
import numpy as np
from torch import nn
from torch.utils.data import DataLoader
from dataset_spectrogram import EEGDataset
from torch.utils.data import random_split
import neptune.new as neptune
from torchinfo import summary
from dataset_spectrogram import load_dataset
import random
import torch.utils.data as data

/usr/bin/python3


In [2]:
# load in the dataset


raw_data_dir = '../data'

trainingNights = 18
testNights = 4

print("\n\nTraining set\n")
training_set = load_dataset(range(trainingNights), raw_data_dir)

print("\nTest set\n")
test_set = load_dataset([x+trainingNights for x in range(testNights)], raw_data_dir)



Training set

../data/study_1A_mat_simple/S_01/night_1/spectrogram_bad_segments.npy
../data/study_1A_mat_simple/S_01/night_1/spectrogra_good_segments.npy
Memory usage: 148.823291 MB

../data/study_1A_mat_simple/S_01/night_1/spectrogram_good_segments.npy
../data/study_1A_mat_simple/S_01/night_2/spectrogram_bad_segments.npy
Memory usage: 266.361048 MB

../data/study_1A_mat_simple/S_01/night_2/spectrogram_good_segments.npy
../data/study_1A_mat_simple/S_01/night_3/spectrogram_bad_segments.npy
Memory usage: 371.216066 MB

../data/study_1A_mat_simple/S_01/night_3/spectrogram_good_segments.npy
../data/study_1A_mat_simple/S_01/night_4/spectrogram_bad_segments.npy
Memory usage: 507.576524 MB

../data/study_1A_mat_simple/S_01/night_4/spectrogram_good_segments.npy
../data/study_1A_mat_simple/S_02/night_1/spectrogram_bad_segments.npy
Memory usage: 693.987222 MB

../data/study_1A_mat_simple/S_02/night_1/spectrogram_good_segments.npy
../data/study_1A_mat_simple/S_02/night_2/spectrogram_bad_segment

In [48]:
device = 'cuda' if torch.cuda.is_available() else 'cpu' #Check for cuda 
print(f'Using {device} device')

class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        #self.flatten = nn.Flatten()
        self.conv_stack = nn.Sequential(
            nn.Conv2d(batch_size,32,kernel_size=3,padding=1),
            nn.ReLU(),
            nn.Conv2d(32,64,kernel_size=3,stride=1,padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2,2),

            nn.Conv2d(64,128,kernel_size=3,stride=1,padding=1),
            nn.ReLU(),
            nn.Conv2d(128,64,kernel_size=3,stride=1,padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2,2),

            nn.Flatten(),
            nn.Linear(155, 512),
            nn.ReLU(),
            nn.Linear(512,1),
            nn.Sigmoid(),
        )

    def forward(self, x):
        #x = self.flatten(x)
        logits = self.conv_stack(x)
        return logits

Using cpu device


In [49]:
learning_rate = 1e-5
batch_size = 64


model = NeuralNetwork()
model.to(device)

#pos_weight: amount of positive examples compared to negative examples. Calculate as: negative_examples/positive_examples
loss = nn.BCEWithLogitsLoss() 

#optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [51]:
def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    
    for batch, (X, y) in enumerate(dataloader):
        X = X.to(device)
        y = y.to(device)
        # Compute prediction and loss
        pred = model(X) 
        pred = pred.reshape(-1)
        pred = pred.to(device)
        yFloat = y.type(torch.FloatTensor).to(device)
        
        loss = loss_fn(pred, yFloat)
        
        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        # Neptune logging
        run["training/batch/loss"].log(loss)
        

        if batch % 1000 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")
        
        if batch % 10000 == 0:
            print(f"Predicted values: \n{pred}")
            print(f"Actual values: \n{yFloat}")
            print(f"Difference: \n{(yFloat-pred)}")


def test_loop(dataloader_test, model, loss_fn, test_set = True):
    size = len(dataloader_test.dataset)
    num_batches = len(dataloader_test)
    test_loss, correct = 0, 0

    with torch.no_grad():
        for X, y in dataloader_test:
            X = X.to(device)
            y = y.to(device)
            pred = model(X).reshape(-1).to(device) # Reshape to 1 dimension if using binary classification, otherwise keep dimensions from model output
            test_loss += loss_fn(pred, y.type(torch.FloatTensor).to(device)).item()
            correct += (pred.round() == y).type(torch.float).sum().item()
            

    test_loss /= num_batches
    correct /= size
    
    
    
    if test_set:
        print(f"Test set Error: \n Test Set Accuracy: {(100*correct):>0.5f}%, Avg Test Set loss: {test_loss:>8f} \n")
        
        # Neptune logging
        run["testing/batch/test_loss"].log(test_loss)
        run["testing/batch/test_Acc"].log(100*correct)
    
    else:
        print(f"Training Set Error: \n Training Set Accuracy: {(100*correct):>0.5f}%, Avg Training Set loss: {test_loss:>8f} \n")
        
        # Neptune logging
        run["testing/batch/training_loss"].log(test_loss)
        run["testing/batch/training_Acc"].log(100*correct)
        

In [52]:
train_dataloader = DataLoader(training_set, batch_size=64, drop_last = True) # Drop_last, to avoid incomplete batches, which won't work with weighted loss
test_dataloader = DataLoader(test_set, batch_size=64, drop_last = True) # Drop_last, to avoid incomplete batches, which won't work with weighted loss


In [53]:

run = neptune.init(
    project="NTLAB/artifactDetect-ear", 
    api_token="eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiIxYTA4NzcxMy1lYmQ2LTQ3NTctYjRhNC02Mzk1NjdjMWM0NmYifQ==", # your credentials
    source_files=["trainModel_spectrogram.ipynb", "dataset_spectrogram.py"]
)  # Credentials


run['config/dataset/size'] = trainingNights 
run['config/model'] = type(model).__name__
run['config/modelSummary'] = str(model)
run['config/optimizer'] = type(optimizer).__name__
run['config/batch_size'] = batch_size
run['config/test_night'] = testNights
run['config/learning_rate'] = learning_rate


https://app.neptune.ai/NTLAB/artifactDetect-ear/e/AR-6
Remember to stop your run once you’ve finished logging your metadata (https://docs.neptune.ai/api-reference/run#.stop). It will be stopped automatically only when the notebook kernel/interactive console is terminated.


In [54]:
epochs = 10
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    
    train_loop(train_dataloader, model, loss, optimizer)    
    test_loop(test_dataloader, model, loss)
    test_loop(train_dataloader, model, loss, test_set = False)

print("Done!")

Epoch 1
-------------------------------
loss: 0.630615  [    0/322876]
Predicted values: 
tensor([0.9999, 0.4577, 0.9999, 0.4577, 0.9999, 0.4577, 1.0000, 0.4577, 1.0000,
        0.4577, 0.9999, 0.4577, 0.9999, 0.4577, 0.9999, 0.4577, 1.0000, 0.4577,
        1.0000, 0.4577, 0.9999, 0.4577, 0.9993, 0.4577, 0.9998, 0.4577, 1.0000,
        0.4577, 1.0000, 0.4577, 1.0000, 0.4577, 0.9999, 0.4577, 0.9999, 0.4577,
        1.0000, 0.4577, 0.9999, 0.4577, 0.9999, 0.4577, 0.9999, 0.4577, 1.0000,
        0.4577, 1.0000, 0.4577, 0.9999, 0.4577, 0.9999, 0.4577, 0.9999, 0.4577,
        1.0000, 0.4577, 1.0000, 0.4577, 0.9999, 0.4577, 0.9999, 0.4577, 0.9999,
        0.4577], grad_fn=<ReshapeAliasBackward0>)
Actual values: 
tensor([1., 0., 1., 0., 1., 0., 1., 0., 1., 0., 1., 0., 1., 0., 1., 0., 1., 0.,
        1., 0., 1., 0., 1., 0., 1., 0., 1., 0., 1., 0., 1., 0., 1., 0., 1., 0.,
        1., 0., 1., 0., 1., 0., 1., 0., 1., 0., 1., 0., 1., 0., 1., 0., 1., 0.,
        1., 0., 1., 0., 1., 0., 1., 0., 1., 

Error occurred during asynchronous operation processing: Cannot log infinite or NaN value to attribute training/batch/loss
Error occurred during asynchronous operation processing: Cannot log infinite or NaN value to attribute training/batch/loss
Error occurred during asynchronous operation processing: Cannot log infinite or NaN value to attribute training/batch/loss
Error occurred during asynchronous operation processing: Cannot log infinite or NaN value to attribute training/batch/loss
Error occurred during asynchronous operation processing: Cannot log infinite or NaN value to attribute training/batch/loss
Error occurred during asynchronous operation processing: Cannot log infinite or NaN value to attribute training/batch/loss
Error occurred during asynchronous operation processing: Cannot log infinite or NaN value to attribute training/batch/loss
Error occurred during asynchronous operation processing: Cannot log infinite or NaN value to attribute training/batch/loss


loss:     nan  [256000/322876]


Error occurred during asynchronous operation processing: Cannot log infinite or NaN value to attribute training/batch/loss
Error occurred during asynchronous operation processing: Cannot log infinite or NaN value to attribute training/batch/loss
Error occurred during asynchronous operation processing: Cannot log infinite or NaN value to attribute training/batch/loss


loss:     nan  [320000/322876]


Error occurred during asynchronous operation processing: Cannot log infinite or NaN value to attribute training/batch/loss


Test set Error: 
 Test Set Accuracy: 0.00000%, Avg Test Set loss:      nan 



Error occurred during asynchronous operation processing: Cannot log infinite or NaN value to attribute testing/batch/test_loss


Training Set Error: 
 Training Set Accuracy: 0.00000%, Avg Training Set loss:      nan 

Epoch 2
-------------------------------
loss:     nan  [    0/322876]
Predicted values: 
tensor([nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
        nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
        nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan],
       grad_fn=<ReshapeAliasBackward0>)
Actual values: 
tensor([1., 0., 1., 0., 1., 0., 1., 0., 1., 0., 1., 0., 1., 0., 1., 0., 1., 0.,
        1., 0., 1., 0., 1., 0., 1., 0., 1., 0., 1., 0., 1., 0., 1., 0., 1., 0.,
        1., 0., 1., 0., 1., 0., 1., 0., 1., 0., 1., 0., 1., 0., 1., 0., 1., 0.,
        1., 0., 1., 0., 1., 0., 1., 0., 1., 0.])
Difference: 
tensor([nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, n

Error occurred during asynchronous operation processing: Cannot log infinite or NaN value to attribute testing/batch/training_loss
Error occurred during asynchronous operation processing: Cannot log infinite or NaN value to attribute training/batch/loss
Error occurred during asynchronous operation processing: Cannot log infinite or NaN value to attribute training/batch/loss
Error occurred during asynchronous operation processing: Cannot log infinite or NaN value to attribute training/batch/loss
Error occurred during asynchronous operation processing: Cannot log infinite or NaN value to attribute training/batch/loss
Error occurred during asynchronous operation processing: Cannot log infinite or NaN value to attribute training/batch/loss
Error occurred during asynchronous operation processing: Cannot log infinite or NaN value to attribute training/batch/loss


loss:     nan  [64000/322876]


Error occurred during asynchronous operation processing: Cannot log infinite or NaN value to attribute training/batch/loss
Error occurred during asynchronous operation processing: Cannot log infinite or NaN value to attribute training/batch/loss
Error occurred during asynchronous operation processing: Cannot log infinite or NaN value to attribute training/batch/loss


loss:     nan  [128000/322876]


Error occurred during asynchronous operation processing: Cannot log infinite or NaN value to attribute training/batch/loss


KeyboardInterrupt: 

Error occurred during asynchronous operation processing: Cannot log infinite or NaN value to attribute training/batch/loss


In [None]:
# Test the model, and inspect the errors
from datetime import datetime
now = datetime.now()

print(f"../trained_models/model_{now.strftime('%m_%d_%Y_%H_%M_%S')}")

model_scripted = torch.jit.script(model) # Export to TorchScript
model_scripted.save(f"../trained_models/model_{now.strftime('%m_%d_%Y_%H_%M_%S')}") # Save



In [55]:
run.stop() # Stop the neptune logging run

Shutting down background jobs, please wait a moment...
Done!


Waiting for the remaining 1 operations to synchronize with Neptune. Do not kill this process.


All 1 operations synced, thanks for waiting!
