In [1]:
from torch import nn, optim
import numpy as np
import pickle as p
import torch
from tqdm.notebook import trange
from tqdm import tqdm
import matplotlib.pyplot as plt

from model import ConvNet
from sklearn.metrics import f1_score
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from dataset import SleepDataset
from meanAveragePrecision import computeMeanAveragePrecision
import utils

In [9]:
# function for training and evaluation
def test(model, test_dataloader):
    accuracies = []
    f1s = []
    softmax_scores = []

    with torch.no_grad():

        model.eval()

        running_accuracy = 0.0
        conf_mat = torch.zeros(5,5)

        for input, target in test_dataloader:
            
            output = model(input)

            prediction = torch.argmax(output, dim=1).float() 
            softmax = torch.softmax(output, dim=1)
 
            for o, t in zip(prediction, target):
                if o == t:
                    running_accuracy+=1/(len(test_dataloader)*batch_size)
                
            #running_f1 += f1_score(target, prediction)
            conf_mat += utils.confusion_mat(output, target)

            #running_f1 /= len(val_dataloader)

            accuracies.append(running_accuracy)
            #f1_val.append(running_f1)
            softmax_scores.append(softmax.tolist())

            # output
            if (len(accuracies) % 20 == 0 or len(accuracies) == len(test_dataloader)):
                tqdm.write('No. {} (test) -- acc: {:.4f}'.format(len(accuracies), running_accuracy))

        # mean average precision
        softmax_scores = np.asarray(softmax_scores).squeeze(1)
        mean_avg_precision, _ = computeMeanAveragePrecision(labels, softmax_scores)

        # f1 score
        f1 = utils.f1_score(conf_mat)


        # print last value of metrics
        tqdm.write('Final accuracy: {:.4f}, mean avg precision {:.4f}, f1: {:.4f}'.format(running_accuracy, mean_avg_precision, f1))


    # make metrics callable outside this function
    test.accuracy = accuracies
    test.f1 = f1
    test.mean_avg_precision = mean_avg_precision

In [7]:
# load data and label files
# shape = [2284, 200, 3, 9] --> [datasets, time series, channels, devices]
data_aug = p.load(open(r"data_aug.pkl", "rb"))
labels = p.load(open(r"labels_aug.pkl", "rb"))
print(data_aug.shape)
print(labels.shape)

#reshape data into datasets x (channels x devices) x time series 
#data_aug = data_aug.transpose((0,1,3,2))
#data = data.reshape(data.shape[0], -1, data.shape[3])
#print(data.shape)'

batch_size = 1
# create datasets
test_dataset = SleepDataset(data_aug[:1090,...], labels[:1090,...], train=False)
test_dataloader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

(6364, 11, 300)
(6364,)


### No augmentation, no weighted loss

In [10]:
# load model for testing
model = ConvNet()
model.load_state_dict(torch.load("trained_models/net.pt"))
# test model
test(model.double(), test_dataloader)

No. 20 (test) -- acc: 0.0138
No. 40 (test) -- acc: 0.0367
No. 60 (test) -- acc: 0.0596
No. 80 (test) -- acc: 0.0734
No. 100 (test) -- acc: 0.1101
No. 120 (test) -- acc: 0.1422
No. 140 (test) -- acc: 0.1560
No. 160 (test) -- acc: 0.1743
No. 180 (test) -- acc: 0.1927
No. 200 (test) -- acc: 0.2248
No. 218 (test) -- acc: 0.2477
Final accuracy: 0.2477, mean avg precision 0.2167, f1: 0.1833


### No augmentation, weighted loss

In [11]:
# load model for testing
model = ConvNet()
model.load_state_dict(torch.load("trained_models/net_weighted.pt"))
# test model
test(model.double(), test_dataloader)

No. 20 (test) -- acc: 0.0229
No. 40 (test) -- acc: 0.0413
No. 60 (test) -- acc: 0.0596
No. 80 (test) -- acc: 0.1009
No. 100 (test) -- acc: 0.1422
No. 120 (test) -- acc: 0.1835
No. 140 (test) -- acc: 0.2202
No. 160 (test) -- acc: 0.2615
No. 180 (test) -- acc: 0.2844
No. 200 (test) -- acc: 0.3119
No. 218 (test) -- acc: 0.3349
Final accuracy: 0.3349, mean avg precision 0.2251, f1: 0.2560


### Augmentation, no weighted loss

In [12]:
# load model for testing
model = ConvNet()
model.load_state_dict(torch.load("trained_models/net_aug.pt"))
# test model
test(model.double(), test_dataloader)

No. 20 (test) -- acc: 0.0550
No. 40 (test) -- acc: 0.0872
No. 60 (test) -- acc: 0.1193
No. 80 (test) -- acc: 0.1422
No. 100 (test) -- acc: 0.1606
No. 120 (test) -- acc: 0.1927
No. 140 (test) -- acc: 0.2202
No. 160 (test) -- acc: 0.2569
No. 180 (test) -- acc: 0.2706
No. 200 (test) -- acc: 0.3028
No. 218 (test) -- acc: 0.3303
Final accuracy: 0.3303, mean avg precision 0.2083, f1: 0.2946


### Augmentation, Weighted loss

In [13]:
# load model for testing
model = ConvNet()
model.load_state_dict(torch.load("trained_models/net_aug_weighted.pt"))
# test model
test(model.double(), test_dataloader)

No. 20 (test) -- acc: 0.0229
No. 40 (test) -- acc: 0.0550
No. 60 (test) -- acc: 0.0963
No. 80 (test) -- acc: 0.1055
No. 100 (test) -- acc: 0.1330
No. 120 (test) -- acc: 0.1789
No. 140 (test) -- acc: 0.2064
No. 160 (test) -- acc: 0.2294
No. 180 (test) -- acc: 0.2569
No. 200 (test) -- acc: 0.2936
No. 218 (test) -- acc: 0.3303
Final accuracy: 0.3303, mean avg precision 0.2303, f1: 0.3154
