## Dreem 2 Sleep Classification challenge 2020
**Student: Felipe Cybis Pereira**

This notebook is the main code for training and producing test data for the challenge.


In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import h5py # Read and write HDF5 files from Python

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# filenames for data we will work with
data_path = "/kaggle/input/notebook-producing-spectrograms/"
file_xtrain = data_path + "Sxx_x_train.h5"
file_xtest = data_path + "Sxx_x_test.h5"
file_ytrain = data_path + "y_train.csv"

data_path_eeg = "/kaggle/input/dreem-2-sleep-classification-challenge-2020/"
file_eeg_xtrain = data_path_eeg + "X_train.h5/X_train.h5"

# training labels
y_data = pd.read_csv(file_ytrain)

# what does the h5 file contains ?
with h5py.File(file_xtrain, "r") as hf:
    fields = list(hf.keys())
    print(fields)

/kaggle/input/dreem-2-sleep-classification-challenge-2020/sample_submission.csv
/kaggle/input/dreem-2-sleep-classification-challenge-2020/y_train.csv
/kaggle/input/dreem-2-sleep-classification-challenge-2020/X_train.h5/X_train.h5
/kaggle/input/dreem-2-sleep-classification-challenge-2020/X_test.h5/X_test.h5
/kaggle/input/notebook-producing-spectrograms/Sxx_x_train.h5
/kaggle/input/notebook-producing-spectrograms/y_train.csv
/kaggle/input/notebook-producing-spectrograms/__results__.html
/kaggle/input/notebook-producing-spectrograms/Sxx_x_test.h5
/kaggle/input/notebook-producing-spectrograms/__notebook__.ipynb
/kaggle/input/notebook-producing-spectrograms/__output__.json
/kaggle/input/notebook-producing-spectrograms/custom.css
['Sxx_1', 'Sxx_2', 'Sxx_3', 'Sxx_4', 'Sxx_5', 'Sxx_6', 'Sxx_7', 'index', 'index_window']


We are going to use the spectrogram data produced by `notebook-producing-spectrograms`. Note that instead of the 'eeg_i' fields we have 'Sxx_i' and the same 'index' and 'index_window' fields.

In [2]:
import torch
from torch.utils.data import Dataset, DataLoader

def get_train_validation_dataset(channel_list, ratio=0.2, k_neighbor=0):
    """
    Get train and validation set by splitting the training set. File path for x_data and for y_data are hard coded
    within the function.
        :param channnel_list (list): name of channels to use from the spectrograms available
        :param ratio (default=0.2): ratio to split into training set (1-ratio) and validation set (ratio)
        The splitting is done randomly and subject-wise!
        :param k_neighbor (int): number of epochs to append before and after the main epoch
        Number of epochs to be returned by the data set is (2*k_neighbor+1)
    """
    
    print('Channel being used: ' + str(channel_list))
    with h5py.File(file_xtrain, "r") as fi:
        x_data = np.stack(list(fi[channel][()] for channel in channel_list), axis=0)
        index_window = fi['index_window'][()]
        index = fi['index'][()]
    print('Input data shape: ' + str(x_data.shape))
    y_data = pd.read_csv(file_ytrain)['sleep_stage'].to_numpy()
    
    subject_idx = np.unique(index)
    split = int((1-ratio) * len(subject_idx))
    
    # shuffling subjects
    np.random.shuffle(subject_idx)
    train_subjects, val_subjects = subject_idx[:split], subject_idx[split:]
    print('Training subjects: ' + str(train_subjects))
    print('Validation subjects: ' + str(val_subjects))
    
    train_indices = np.where(np.in1d(index, train_subjects))[0]
    val_indices = np.where(np.in1d(index, val_subjects))[0]
    
    x_train, x_validation = x_data[:,train_indices], x_data[:,val_indices]
    y_train, y_validation = y_data[train_indices], y_data[val_indices]
    idx_window_train, idx_window_validation = index_window[train_indices], index_window[val_indices]
    idx_train, idx_validation = index[train_indices], index[val_indices]
    
    train_ds = SxxEpochDataset(x_data=x_train,
                               y_data=y_train,
                               index_window=idx_window_train,
                               k_neighbor=k_neighbor)
    
    val_ds = SxxEpochDataset(x_data=x_validation,
                             y_data=y_validation,
                             index_window=idx_window_validation,
                             k_neighbor=k_neighbor)
    
    return train_ds, val_ds

def get_test_dataset(channel_list, k_neighbor=0):
    """
    Get test set. File path for x_data is hard coded within the function.
        :param channnel_list (list): name of channels to use from the spectrograms available
        :param k_neighbor (int): number of epochs to append before and after the main epoch
        Number of epochs to be returned by the data set is (2*k_neighbor+1)
    """
    
    print('Channel being used: ' + str(channel_list))
    with h5py.File(file_xtest, "r") as fi:
        x_test = np.stack(list(fi[channel][()] for channel in channel_list), axis=0)
        index_window = fi['index_window'][()]
        index = fi['index'][()]
    print('Input data shape: ' + str(x_test.shape))
    subjects = np.unique(index)
    print('Test subjects: ' + str(subjects))
    
    
    test_ds = SxxEpochDataset(x_data=x_test,
                              y_data=None,
                              index_window=index_window,
                              k_neighbor=k_neighbor)

    
    return test_ds

In [3]:
class SxxEpochDataset(Dataset):
    """Create PyTorch dataset for spectrogram epochs."""

    def __init__(self, x_data, y_data=None, index_window=None, k_neighbor=0):
        """
        Args:
            x_data (numpy array): Numpy array of input data.
            y_data (list of numpy array): Sleep Stages
            index_window (numpy array): Index of epoch in a particular subject
            k_neighbor (int or float): Number of epochs to be concatenated before and after the main epoch.
            Total number of epochs is (2*k_neighbor+1)
        """
        
        self.y_data = y_data
        if self.y_data is None:
            print('Labels not given, dataset will return only x values')
        self.index_window = index_window
        assert self.index_window is not None, 'Intra subject epoch indexing is needed for the right padding'
        
        self.x_data = x_data
        self.num_channels = self.x_data.shape[0]
        self.data_size = self.x_data.shape[1]
        self.freq_size = self.x_data.shape[2]
        self.one_epoch_length = self.x_data.shape[3]
        
        self.k_neighbor = k_neighbor
        
    def __len__(self):
        return self.data_size

    def __getitem__(self, idx):
        
        main_epoch_id = self.index_window[idx]
        
        if idx >= self.k_neighbor and idx <= self.data_size - (self.k_neighbor+1):
            # idx is in the "safe zone" (almost always)
            indices = self.index_window[idx - self.k_neighbor: idx + self.k_neighbor + 1]
            epochs = np.copy(self.x_data[:,idx - self.k_neighbor: idx + self.k_neighbor + 1])
            
            differences_to_check = np.arange(main_epoch_id - self.k_neighbor, main_epoch_id + self.k_neighbor + 1)
            # checking if indices are all good or if needs padding
            check_indices = np.equal(indices,differences_to_check)
            
            # This padding is easier, if there is some gap in self.index_window the data or if data is changing
            # subjects in the sequence, we can just multiply the bad epochs by zero
            if not np.all(check_indices):
                for ii, checked_index in enumerate(check_indices):
                    if not checked_index:
                        epochs[:,ii] = epochs[:,ii]*0 
        
        # If idx is lower than k_neighbors there is not data to fetch before the beggining
        # of the dataset. We must create zero padding for it
        elif idx < self.k_neighbor:
            nb_epochs_to_pad = self.k_neighbor - idx
            epochs_to_pad = np.zeros((self.num_channels, nb_epochs_to_pad, self.freq_size, self.one_epoch_length))
            epochs = np.copy(self.x_data[:,:idx + self.k_neighbor + 1])
            
            epochs = np.concatenate((epochs_to_pad, epochs), axis=1)
            
        # Same thing if idx is too high
        elif idx > self.data_size - (self.k_neighbor+1):
            nb_epochs_to_pad = idx - (self.data_size - (self.k_neighbor+1))
            epochs_to_pad = np.zeros((self.num_channels, nb_epochs_to_pad, self.freq_size, self.one_epoch_length))
            epochs = np.copy(self.x_data[:,idx - self.k_neighbor:])
            
            epochs = np.concatenate((epochs, epochs_to_pad), axis=1)
        
            
        epochs = epochs.swapaxes(2,1).reshape(self.num_channels,self.freq_size,-1)
        
        if self.y_data is not None:
            label = self.y_data[idx]
            return epochs.astype('float32'), label

        return epochs.astype('float32')
            

In [4]:
import torch
import torch.nn as nn

"""
Bottleneck Attention Module (BAM) such as in Park et al. (arXiv:1807.06514)
Inspired by https://medium.com/visionwizard/understanding-attention-modules-cbam-and-bam-a-quick-read-ca8678d1c671
"""

class Flatten(nn.Module):
    def forward(self, x):
        return x.view(x.size(0), -1)
        
        
class ChannelAttentionGate(nn.Module):
    """Channel attention by global average pooling, then encoding and decoding linear layers
    with a reduction rate of (default=16)"""
    def __init__(self, num_channels, reduction_ratio=16):
        super().__init__()
  
        self.gate_c = nn.Sequential(
            nn.AdaptiveAvgPool2d((1,1)),
            Flatten(),
            nn.Linear(num_channels, num_channels // reduction_ratio),
            nn.BatchNorm1d(num_channels // reduction_ratio),
            nn.ReLU(),
            nn.Linear(num_channels // reduction_ratio, num_channels),
            nn.BatchNorm1d(num_channels),
            nn.ReLU(),
        )
        
    def forward(self, xb):
        # unsqueezing and expanding so it has same dimension as input
        xb = self.gate_c(xb).unsqueeze(2).unsqueeze(3).expand_as(xb)
        return xb
        
class SpatialAttentionGate(nn.Module):
    """Spatial attention then encoding and decoding convolutional layers
    with a reduction rate of (default=16)"""
    def __init__(self, num_channels, reduction_ratio=16, dilation_val=4):
        super().__init__()
        self.gate_s = nn.Sequential(
            nn.Conv2d(num_channels, num_channels // reduction_ratio, kernel_size=1),
            nn.BatchNorm2d(num_channels // reduction_ratio),
            nn.ReLU(),
            nn.Conv2d(num_channels // reduction_ratio, num_channels // reduction_ratio, kernel_size=3, padding=dilation_val, dilation=dilation_val),
            nn.BatchNorm2d(num_channels // reduction_ratio),
            nn.ReLU(),
            nn.Conv2d(num_channels // reduction_ratio, num_channels // reduction_ratio, kernel_size=3, padding=dilation_val, dilation=dilation_val),
            nn.BatchNorm2d(num_channels // reduction_ratio),
            nn.ReLU(),
            nn.Conv2d(num_channels // reduction_ratio, 1, kernel_size=1),
        )
        
    def forward(self, xb):
        # expanding so it has same dimension as input
        xb = self.gate_s(xb).expand_as(xb)
        return xb

class BAM(nn.Module):
    """Bottleneck Attention Module, puts together
    SpatialAttentionGate and ChannelAttentionGate, with a residual learning scheme."""
    def __init__(self, num_channels):
        super().__init__()
        self.channel_attention = ChannelAttentionGate(num_channels=num_channels)
        self.spatial_attention = SpatialAttentionGate(num_channels=num_channels)
        
    def forward(self, xb):
        attention = torch.sigmoid(self.channel_attention(xb) + self.spatial_attention(xb))
        return xb + xb*attention


In [5]:
import torch
import torch.nn as nn
""" 
Creating PyTorch module for the Convolutional Neural Network with optional attention module and k_neighboring epochs.
One spectrogram epoch has dimensions (40,60).
Input dimensions is (40, (2*k_neighbor+1)*60).
There are 3 max_pool2d along the net during the convolutional blocks, kernels being (2,4), (2,3), (2,5).
-> This means that before the fully connected blocks, the data has dimensions (5, (2*k_neighbor+1)).

First block:
        - BatchNorm2d
        - Conv2d(input=channels, output=64)
        - ReLU
        - Bottleneck Attention Module (optional)
        - MaxPool2d
        
Main blocks structure: 
        - BatchNorm2d
        - Conv2d(input=input, output=input*2)
        - ReLU
        - Conv2d(input=input*2, output=input*4)
        - ReLU
        - Bottleneck Attention Module (optional)
        - MaxPool2d
        
Final fully connected blocks:
        - Dropout (50%)
        - Linear
        - LeakyReLU (-0.1)
        - Dropout (50%)
        - Linear

Two fully connected layers with dropout(50%) before each one.

"""

class Dreem_CNN(nn.Module):
    def __init__(self,
                 channels,
                 k_neighbors=0,
                 use_attention=False):
        
        super().__init__()
        
        # number of occipital and frontal channels
        self.num_channels = channels
        
        
        self.k_neighbors = k_neighbors ## 40, (2*k+1)*60
        self.activ_relu = nn.ReLU()
        self.activ_leakyrelu = nn.LeakyReLU(negative_slope=0.1)


        self.conv_bloc_1 = nn.Sequential()
        self.conv_bloc_1.add_module('init_bn', nn.BatchNorm2d(self.num_channels))
        self.conv_bloc_1.add_module('init_conv2d', nn.Conv2d(self.num_channels, 64, kernel_size=(3,3), stride=1, padding=(1,1)))
        self.conv_bloc_1.add_module('init_activ', self.activ_relu)
        if use_attention:
            self.conv_bloc_1.add_module('init_bam', BAM(64))
        self.conv_bloc_1.add_module('init_maxpool2d', nn.MaxPool2d(kernel_size=(2,4)))

        self.conv_bloc_2 = self._make_conv_bloc(init_input=64, max_pool_kernel=(2,3), with_attention=use_attention)

        self.conv_bloc_3 = self._make_conv_bloc(init_input=256, max_pool_kernel=(2,5), with_attention=use_attention)
        
        ## need reshape before entering here
        self.full_conn_bloc = nn.Sequential(
            nn.Dropout(p=0.5),
            nn.Linear(int(1024*5*(2*self.k_neighbors+1)),100),
            self.activ_leakyrelu,
        )
        
        
        self.classification_conn_bloc = nn.Sequential(
            nn.Dropout(p=0.5),
            nn.Linear(100,5)
        )
        

    def _make_conv_bloc(self, init_input, max_pool_kernel, with_attention=False):
        conv_bloc = nn.Sequential()
        conv_bloc.add_module('bn2d', nn.BatchNorm2d(init_input))
        conv_bloc.add_module('conv2d_1', nn.Conv2d(init_input, 2*init_input, kernel_size=3, stride=1, padding=(1,1)))
        conv_bloc.add_module('activ', self.activ_relu)
        conv_bloc.add_module('conv2d_2', nn.Conv2d(2*init_input, 2*2*init_input, kernel_size=3, stride=1, padding=(1,1)))
        conv_bloc.add_module('activ', self.activ_relu)
        if with_attention:
            conv_bloc.add_module('attention', BAM(2*2*init_input))
        conv_bloc.add_module('maxpool2d', nn.MaxPool2d(kernel_size=max_pool_kernel))
        
        return conv_bloc
    
    def forward(self, xb):
        
        xb = self.conv_bloc_1(xb)
        xb = self.conv_bloc_2(xb)
        xb = self.conv_bloc_3(xb)
        
        xb = xb.reshape(xb.shape[0],-1) # flatten all dimensions except batch dimension
        xb = self.full_conn_bloc(xb)
        xb = self.classification_conn_bloc(xb)

        return xb
    

In [6]:
from sklearn.metrics import balanced_accuracy_score, cohen_kappa_score, confusion_matrix, f1_score
"""
Useful functions to be used in the training function.
"""

def loss_val(net, val_loader, criterion):
    """
    Function to compute validation loss (not computed during retropropagation!)
        :param net: pytorch model
        :param val_loader: pytorch dataloader
        :param criterion: loss function
        
        :return val_loss: mean loss in whole dataset
    """
    with torch.no_grad(): # do not forget to remove gradient computing during evaluation !!!
        val_loss = []
        net.eval()
        for inputs, labels in val_loader:

            inputs, labels = inputs.to(device), labels.to(device)
            
            outputs = net(inputs)
            
            loss = criterion(outputs, labels)
            val_loss += [loss.item()]
            
    net.train()
    return np.mean(val_loss)

def evaluate_scores(net, dataloader):
    """
    To evaluate validation scores. Uses the evaluate function just below
        :param net: pytorch network model
        :param dataloader: pytorch dataloader
    """
    with torch.no_grad():
        prediction_list = torch.empty(0).to(device)
        true_list = torch.empty(0).to(device)

        net.eval() # set net to evaluation mode (necessary if using batch normalization, for example)
        for data in dataloader:
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)

            outputs = net(inputs)
            _, predicted = torch.max(outputs, 1)
            prediction_list = torch.cat([prediction_list, predicted])
            true_list = torch.cat([true_list, labels])

        # Scores
        true_list = true_list.cpu().numpy()
        prediction_list = prediction_list.cpu().numpy()
        scores = evaluate(true_list, prediction_list)
        
    net.train() # reset net to training mode to continue training
    return scores 
    
    
# score function
def evaluate(true, pred):
    """
    Function using sklearn.metrics functions to score predicted labels against true labels.
    f1_score with weighted average is the scoring used in the challenge!
    """
    scores = {'balanced_accuracy': balanced_accuracy_score(true, pred),
            'cohen_kappa': cohen_kappa_score(true, pred),
            'confusion_matrix': confusion_matrix(true, pred),
             'mean_f1_score': f1_score(true,pred,average='weighted')}

    return scores

In [7]:
from sklearn.model_selection import KFold
from torch.utils.data.dataset import Subset
from torch.utils.data import DataLoader
from torch import optim


def train_model(model, 
                batch_size=64,
                n_epoch=15,
                k_fold=3,
                loss_func=nn.CrossEntropyLoss,
                optimizer=optim.AdamW,
                learning_rate=0.001):
    """
    Function to train the model with k-fold cross validation.
        :param model: PyTorch model
        :param batch_size: for batch training, usually set to 32, 62, 128... (default=64)
        :param n_epoch: number of epochs to train (default=15)
        :param k-fold: number of k-fold cross validation (default=3).
        It will subdivide the training dataset in 3 subsets and train model in 2 of them while validating on the third one.
        For each epoch, 3 iterations is done (train:(0,1) validate:(2), train:(1,2) validate:(0), train:(0,2) validate:(1))
        :param loss_func: loss function to be used. If LogSoftmax function is used in the model, we suggest to use NLLLoss.
        If not, CrossEntropyLoss is suggested.
        :param optimizer: optimizer method to be used (default=AdamW (to be used with weight_decay))
        :param learning_rate: starting learning rate (default=0.001)

        :return best_model: the best model according to the mean validation loss (list)
        :return all_val_loss: mean validation loss for each epoch
        :return k_fold_loss: validation loss for each k-fold for each epoch
        :return val_scores: dictionaire with scores for each k-fold for each epoch
        :return fin_val_loss: validation loss for each epoch in unseen data
        :return fin_val_scores: dictionaire with scores for each epoch in unseen data

    Scores used are the following sklearn.metrics functions:
        [balanced_accuracy_score, cohen_kappa_score, confusion_matrix, f1_score]
        
    OBS: This is a tunable function. It is highly recommended to go beyond the parameters mentioned above.
    It is not a "plug-and-play" function, it should be adapted to the case of study!!
    """

    print('Using GPU:', torch.cuda.is_available())
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    model = model.to(device)
    bs = batch_size
    n_epoch = n_epoch
    k_fold = k_fold
  
    criterion = loss_func()
    new_learning_rate = learning_rate
    optim = optimizer(model.parameters(), lr=new_learning_rate, weight_decay=0.0001)

    k_fold_loss = np.empty((0,3))
    val_scores = dict()
    fin_val_scores = dict()
    all_val_loss = []
    fin_val_loss = []
    best_models = []
    best_all_val_loss = []
    for epoch in range(n_epoch):  # loop over the dataset multiple times
        model.train()
        print("Training mode")
        
        # Adaptative learning rate
        new_learning_rate = learning_rate*np.exp(-(epoch/4))
        print("Learning rate:", new_learning_rate)
        optim = optimizer(model.parameters(), lr=new_learning_rate, weight_decay=0.0001)

        
        # validation losses for this epoch (n=k_fold)
        val_loss = []
        k = 0
        for train_indices, val_indices in KFold(n_splits=k_fold).split(list(range(len(train_ds)))):
            # k-fold cross validation
            # k-fold dataloader (k=3) - Take validation subset for training, to avoid overfit
    
            train_subset = Subset(train_ds, train_indices)
            val_subset = Subset(train_ds, val_indices)

            ##### unbalanced sampler for unbalanced datasets! #####
            y_data = train_subset.dataset.y_data[train_indices]
            
            class_sample_count = [len(y_data[y_data==sleep_class]) for sleep_class in range(5)]
            class_weights = 1/torch.Tensor(class_sample_count)

            # some handmade adjustments 
            class_weights[1] /= 1.6
            class_weights[0] = class_weights[-1]
            class_weights[2] = class_weights[-1]
            class_weights[3] = class_weights[-1]*1.4
            class_weights[-1] *= 1.4

            weights = [class_weights[y_data[i]] for i in range(len(y_data))]
            
            # feed weights to loss criterion and sampler 
            criterion = loss_func(weight=class_weights.to(device))
            sampler = torch.utils.data.sampler.WeightedRandomSampler(weights, len(y_data))
            #######################################################
            
            # train_dataloader with WeightedRandomSampler!
            train_dataloader = DataLoader(train_subset, batch_size=bs, num_workers=4, sampler=sampler)
            
            running_loss = 0.0
            for i, data in enumerate(train_dataloader, 0):
                
                # get the inputs; data is a tuple of (inputs, labels)
                inputs, labels = data
                inputs, labels = inputs.to(device), labels.to(device)

                # zero the parameter gradients
                optim.zero_grad()

                outputs = model(inputs)
                loss = criterion(outputs, labels)
                loss.backward() # retropropagating error
                optim.step() 

                # print statistics
                running_loss += loss.item()
                if i % 100 == 99:
                    print('[%d, %5d] loss: %.4f' %
                      (epoch + 1, i + 1, running_loss / 100))

                    running_loss = 0.0

                
            # k-fold validation dataset: the k-fold data set that has not been used for the last training epoch 
            val_dataloader = DataLoader(val_subset, batch_size=2*bs, num_workers=8)
            val_loss += [loss_val(model, val_dataloader, criterion)]
            
            # scores for the k-fold 
            val_scores['epoch_'+str(epoch)+'k_'+str(k)] = evaluate_scores(model, val_dataloader)

            k += 1

        
        # keeping track of loss for each k-fold validation for each epoch
        k_fold_loss = np.append(k_fold_loss, np.array([val_loss]), axis=0)
        
        # keeping track of mean loss for each epoch
        all_val_loss += [np.round(np.mean(val_loss), 5)]
        print('K-fold validation loss:', all_val_loss)
        
        ######################################################
        # Updating models so it returns the best model related to all_val_loss
        if epoch == 0: # first epoch, save model
            best_model = model.state_dict()
            best_all_val_loss = all_val_loss[-1]
        else: # update the model by the new one if new one is better
            msg = "Model not updated!"
            if all_val_loss[-1] < best_all_val_loss:
                best_model = model.state_dict()
                best_all_val_loss = all_val_loss[-1]
                msg = "Best model updated!"
            print(msg, best_all_val_loss)
        ######################################################
            
        
        # final validation dataset: has not been used for the training
        fin_val_dataloader = DataLoader(val_ds, batch_size=bs, num_workers=8)
        fin_val_loss += [np.round(loss_val(model, fin_val_dataloader, loss_func()),5)]
        print('Final validation loss:', fin_val_loss)
        fin_val_scores['epoch_'+str(epoch)] = evaluate_scores(model, fin_val_dataloader)
        print(fin_val_scores['epoch_'+str(epoch)])

    print('Finished training!')
    return best_model, all_val_loss, k_fold_loss, val_scores, fin_val_loss, fin_val_scores

In [8]:
print('GPU: ', torch.cuda.is_available())
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

GPU:  True


### Initializing datasets
- Choosing channels
- k-neighbors

In [9]:
occipital = ['Sxx_4', 'Sxx_5', 'Sxx_6', 'Sxx_7']
frontal = ['Sxx_1', 'Sxx_2', 'Sxx_3']
channels = frontal+occipital 
k_neighbor = 2
train_ds, val_ds = get_train_validation_dataset(channel_list=channels, ratio=0.2, k_neighbor=k_neighbor)
test_ds = get_test_dataset(channel_list=channels, k_neighbor=k_neighbor)
print('Datasets ready!')

Channel being used: ['Sxx_1', 'Sxx_2', 'Sxx_3', 'Sxx_4', 'Sxx_5', 'Sxx_6', 'Sxx_7']
Input data shape: (7, 24688, 40, 60)
Training subjects: [22  8  6 16 18 28 10 26 24 14  9 13  3 12 20 23 21  4  7 30  0 27 29 11]
Validation subjects: [25 15 19  1  2  5 17]
Channel being used: ['Sxx_1', 'Sxx_2', 'Sxx_3', 'Sxx_4', 'Sxx_5', 'Sxx_6', 'Sxx_7']
Input data shape: (7, 24980, 40, 60)
Test subjects: [31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
 55 56 57 58 59 60]
Labels not given, dataset will return only x values
Datasets ready!


### Initializing model and calling training function

In [10]:

model = Dreem_CNN(channels=len(channels),
                  k_neighbors=k_neighbor,
                  use_attention=True)

best_model, all_val_loss, k_fold_loss, val_scores, fin_val_loss, fin_val_scores = train_model(model=model, 
                                                                                              n_epoch=15,
                                                                                              batch_size=64,
                                                                                              k_fold=3,
                                                                                              learning_rate=0.001)

Using GPU: True
Training mode
Learning rate: 0.001
[1,   100] loss: 1.9611
[1,   200] loss: 1.0359
[1,   100] loss: 0.9000
[1,   200] loss: 0.7747
[1,   100] loss: 0.7088
[1,   200] loss: 0.6394
K-fold validation loss: [0.85801]
Final validation loss: [0.93391]
{'balanced_accuracy': 0.6495869793920955, 'cohen_kappa': 0.6132011342448727, 'confusion_matrix': array([[ 511,   43,   25,   10,  108],
       [  51,   83,   78,   11,  109],
       [ 116,   13, 1428,  284,  232],
       [  69,    0,  154,  902,   43],
       [  99,    5,  120,   11,  962]]), 'mean_f1_score': 0.7053155416175727}
Training mode
Learning rate: 0.0007788007830714049
[2,   100] loss: 0.7122
[2,   200] loss: 0.6228
[2,   100] loss: 0.6608
[2,   200] loss: 0.6058
[2,   100] loss: 0.5592
[2,   200] loss: 0.5106
K-fold validation loss: [0.85801, 0.70091]
Best model updated! 0.70091
Final validation loss: [0.93391, 0.83572]
{'balanced_accuracy': 0.6988076844520633, 'cohen_kappa': 0.6581736942786134, 'confusion_matrix': ar

In [11]:
import pickle

def build_result_dict():
    results_dict = {
        'model': model,
        'best_models': best_model,
        'channels': channels,
        'all_val_loss': all_val_loss,
        'k_fold_loss': k_fold_loss,
        'val_scores': val_scores,
        'fin_val_loss': fin_val_loss,
        'fin_val_scores': fin_val_scores,
    }
    return results_dict

# saving results
results_dict = build_result_dict()
results_file = open("Dreem_CNN_results.pkl", "wb")
pickle.dump(results_dict, results_file)
results_file.close()

In [12]:
working_dir = os.getcwd()

filename = 'best_model.pth'
torch.save(best_model, os.path.join(working_dir, filename))

## Testing the best model
There is not an actual test, because we don't have the ground truth. It just creates the .csv file for submission.

In [13]:
idx_absolute = np.arange(len(train_ds) + len(val_ds), len(train_ds) + len(val_ds) + len(test_ds))
test_model = Dreem_CNN(channels=len(channels),
                  k_neighbors=k_neighbor,
                  use_attention=True)

filename = 'best_model.pth'
test_model.load_state_dict(torch.load(filename))
test_model.to(device)
with torch.no_grad(): # do not forget to remove gradient computing during evaluation !!!

    prediction_list = torch.empty(0).to(device)
    test_model.eval()

    test_dataloader = DataLoader(test_ds, batch_size=128, num_workers=4)
    for inputs in test_dataloader:
        outputs = test_model(inputs.to(device))
        _, predicted = torch.max(outputs, 1)

        prediction_list = torch.cat([prediction_list, predicted])

    prediction_list = prediction_list.cpu().numpy()

y_p = np.stack((idx_absolute, prediction_list))
y_p = y_p.astype(int)
df = pd.DataFrame(y_p.T, columns = ['index','sleep_stage'])
df.to_csv("y_predict.csv", index=False)