In [1]:
!which python3
import os
import torch
import numpy as np
from torch import nn
from torch.utils.data import DataLoader
from dataset_spectrogram import EEGDataset
from torch.utils.data import random_split
import neptune.new as neptune
from torchinfo import summary
from dataset_spectrogram import load_dataset
import random
import torch.utils.data as data
from datetime import datetime

/opt/anaconda-2021.11/bin/python3
Spectrogram dataset version 9


In [2]:
# load in the dataset


raw_data_dir = '../data'

trainingNights = 70
testNights = 8

print("\nTest set\n")
test_set = load_dataset(range(testNights), raw_data_dir, normalized = False)

print("\n\nTraining set\n")
training_set = load_dataset([x+testNights for x in range(trainingNights)], raw_data_dir, normalized = False)





Test set

../data/study_1A_mat_simple/S_01/night_1/spectrogram_bad_segments_unnormalized.npy
../data/study_1A_mat_simple/S_01/night_1/spectrogram_good_segments_unnormalized.npy
Memory usage: 0.037827 MB

Lengths:

Good data length: 7368
Bad data length: 7368
Caluculated length: 14735
../data/study_1A_mat_simple/S_01/night_2/spectrogram_bad_segments_unnormalized.npy
../data/study_1A_mat_simple/S_01/night_2/spectrogram_good_segments_unnormalized.npy
Memory usage: 0.044097 MB

Lengths:

Good data length: 4292
Bad data length: 4292
Caluculated length: 8583
../data/study_1A_mat_simple/S_01/night_3/spectrogram_bad_segments_unnormalized.npy
../data/study_1A_mat_simple/S_01/night_3/spectrogram_good_segments_unnormalized.npy
Memory usage: 0.047816 MB

Lengths:

Good data length: 6110
Bad data length: 6110
Caluculated length: 12219
../data/study_1A_mat_simple/S_01/night_4/spectrogram_bad_segments_unnormalized.npy
../data/study_1A_mat_simple/S_01/night_4/spectrogram_good_segments_unnormalized.np

In [3]:
device = 'cuda' if torch.cuda.is_available() else 'cpu' #Check for cuda 
print(f'Using {device} device')



class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        #self.flatten = nn.Flatten()
        self.conv_stack = nn.Sequential(
            nn.Conv2d(batch_size,32,kernel_size=3,padding=1),
            nn.ReLU(),
            nn.Conv2d(32,64,kernel_size=3,stride=1,padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2,2),

            nn.Conv2d(64,128,kernel_size=3,stride=1,padding=1),
            nn.ReLU(),
            nn.Conv2d(128,64,kernel_size=3,stride=1,padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2,2),

            nn.Flatten(),
            nn.Linear(155, 512),
            nn.ReLU(),
            nn.Linear(512,1),
            nn.Sigmoid(),
        )

    def forward(self, x):
        #x = self.flatten(x)
        logits = self.conv_stack(x)
        return logits

Using cuda device


In [4]:
learning_rate = 1e-5
batch_size = 64


model = NeuralNetwork()
model.to(device)

loss = nn.BCEWithLogitsLoss() 

#optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [5]:
def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    
    for batch, (X, y) in enumerate(dataloader):
        X = X.to(device)
        y = y.to(device)
        # Compute prediction and loss
        pred = model(X) 
        pred = pred.reshape(-1)
        pred = pred.to(device)
        yFloat = y.type(torch.FloatTensor).to(device)
        
        loss = loss_fn(pred, yFloat)
        
        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        # Neptune logging
        run["training/batch/loss"].log(loss)
        

        if batch % 1000 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")
        
        if batch % 4000 == 0:
            print(f"Predicted values: \n{pred}")
            print(f"Actual values: \n{yFloat}")
            print(f"Difference: \n{(yFloat-pred)}")


def test_loop(dataloader_test, model, loss_fn, test_set = True):
    size = len(dataloader_test.dataset)
    num_batches = len(dataloader_test)
    test_loss, correct = 0, 0

    with torch.no_grad():
        for X, y in dataloader_test:
            X = X.to(device)
            y = y.to(device)
            pred = model(X).reshape(-1).to(device) # Reshape to 1 dimension if using binary classification, otherwise keep dimensions from model output
            test_loss += loss_fn(pred, y.type(torch.FloatTensor).to(device)).item()
            correct += (pred.round() == y).type(torch.float).sum().item()
            

    test_loss /= num_batches
    correct /= size
    
    
    
    if test_set:
        print(f"Test set Error: \n Test Set Accuracy: {(100*correct):>0.5f}%, Avg Test Set loss: {test_loss:>8f} \n")
        
        # Neptune logging
        run["testing/batch/test_loss"].log(test_loss)
        run["testing/batch/test_Acc"].log(100*correct)
    
    else:
        print(f"Training Set Error: \n Training Set Accuracy: {(100*correct):>0.5f}%, Avg Training Set loss: {test_loss:>8f} \n")
        
        # Neptune logging
        run["testing/batch/training_loss"].log(test_loss)
        run["testing/batch/training_Acc"].log(100*correct)
    


In [6]:
train_dataloader = DataLoader(training_set, batch_size=64, drop_last = True) # Drop_last, to avoid incomplete batches, which won't work with weighted loss
test_dataloader = DataLoader(test_set, batch_size=64, drop_last = True) # Drop_last, to avoid incomplete batches, which won't work with weighted loss


In [7]:

run = neptune.init(
    project="NTLAB/artifactDetect-ear", 
    api_token="eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiIxYTA4NzcxMy1lYmQ2LTQ3NTctYjRhNC02Mzk1NjdjMWM0NmYifQ==", # your credentials
    source_files=["train_model_spectrogram.ipynb", "dataset_spectrogram.py"]
)  # Credentials


run['config/dataset/size'] = trainingNights 
run['config/model'] = type(model).__name__
run['config/modelSummary'] = str(model)
run['config/optimizer'] = type(optimizer).__name__
run['config/batch_size'] = batch_size
run['config/test_night'] = testNights
run['config/learning_rate'] = learning_rate


https://app.neptune.ai/NTLAB/artifactDetect-ear/e/AR-41
Remember to stop your run once you’ve finished logging your metadata (https://docs.neptune.ai/api-reference/run#.stop). It will be stopped automatically only when the notebook kernel/interactive console is terminated.


In [None]:
epochs = 200
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    
    train_loop(train_dataloader, model, loss, optimizer)       
    test_loop(test_dataloader, model, loss)
    
    if (t % 3) == 0:# Only run training accuracy test every third time
        test_loop(train_dataloader, model, loss, test_set = False)
    if (t % 20) == 0:
        now = datetime.now()

        print(f"../trained_models/model_{now.strftime('%m_%d_%Y_%H_%M_%S')}")

        model_scripted = torch.jit.script(model) # Export to TorchScript
        model_scripted.save(f"../trained_models/model_{now.strftime('%m_%d_%Y_%H_%M_%S')}") # Save

        
        
now = datetime.now()

print(f"../trained_models/model_{now.strftime('%m_%d_%Y_%H_%M_%S')}")

model_scripted = torch.jit.script(model) # Export to TorchScript
model_scripted.save(f"../trained_models/model_{now.strftime('%m_%d_%Y_%H_%M_%S')}") # Save

print("Done!")

Epoch 1
-------------------------------
loss: 0.743425  [    0/1690404]
Predicted values: 
tensor([9.6568e-09, 6.9571e-15, 8.0568e-02, 2.1527e-08, 4.9947e-15, 1.0000e+00,
        7.1633e-14, 3.0621e-16, 9.0358e-13, 1.1367e-08, 1.0000e+00, 1.0000e+00,
        1.0000e+00, 9.7339e-30, 1.0000e+00, 2.1545e-22, 1.4739e-04, 1.0000e+00,
        4.1939e-17, 9.9282e-01, 2.4297e-01, 1.4544e-02, 7.4456e-12, 1.0000e+00,
        9.9983e-01, 1.7424e-17, 1.3964e-19, 1.4180e-16, 1.7088e-06, 2.2962e-10,
        4.1122e-13, 1.0000e+00, 9.9993e-01, 2.2392e-12, 4.3564e-05, 1.1527e-16,
        1.7587e-11, 3.3085e-39, 3.4818e-13, 1.0000e+00, 8.6905e-12, 1.0296e-03,
        1.3590e-12, 1.5365e-15, 5.3618e-18, 3.4438e-20, 3.9291e-10, 5.9641e-01,
        1.9934e-08, 1.6497e-31, 1.5052e-25, 9.9996e-01, 2.7288e-13, 4.5447e-01,
        1.3416e-14, 2.4903e-11, 6.9261e-01, 8.6422e-15, 2.6101e-27, 1.5247e-05,
        4.4099e-08, 3.1835e-05, 5.8095e-26, 2.9692e-06], device='cuda:0',
       grad_fn=<ReshapeAliasBackwar

In [None]:
# Test the model, and inspect the errors
from datetime import datetime
now = datetime.now()

print(f"../trained_models/model_{now.strftime('%m_%d_%Y_%H_%M_%S')}")

model_scripted = torch.jit.script(model) # Export to TorchScript
model_scripted.save(f"../trained_models/model_{now.strftime('%m_%d_%Y_%H_%M_%S')}") # Save



In [None]:
run.stop() # Stop the neptune logging run

In [None]:
for batch, (X, y) in enumerate(train_dataloader):
    if sum(sum(sum(np.isnan(X)))):
        print(batch)
        print("X:   NAN FOUND!!!\nNAN FOUND!!!\nNAN FOUND!!!\nNAN FOUND!!!\nNAN FOUND!!!\nNAN FOUND!!!\nNAN FOUND!!!\n")
        print(sum(sum(sum(np.isnan(X)))))
        
    if batch % 1000 == 100:
        print(batch)
    

In [None]:
for index, i in enumerate(training_set):
    if sum(sum(np.isnan(i[0]))):
        #print(index)
        #print(i)
        print(f"X:   NAN FOUND!!! At index: {index}. Amount of nans: {sum(sum(np.isnan(i[0])))}")
        #print(sum(sum(np.isnan(i[0]))))
        
    if index % 50000 == 0:
        print(index)
    