## IMPORTS

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
from torch.utils.data import Dataset
import torch
from torch.nn import Conv2d, BatchNorm2d, ReLU, Linear, Dropout, ModuleList, LogSoftmax, LayerNorm
import datetime
import os
import pandas as pd
from scipy.signal import butter, filtfilt, resample

import numpy as np
import torch
from sklearn.metrics import balanced_accuracy_score
from sklearn.utils import compute_class_weight
from torch.nn import NLLLoss
from torch.optim import RMSprop
from torch.optim.lr_scheduler import LambdaLR
from torch.utils.data import DataLoader



In [None]:
torch.cuda.get_device_name(0)

'Tesla T4'

## PREPROCESSING

### CONSTANTS

In [None]:
# define the constants influencing the preprocessing part
CUTOFF = 25.6

SAMPLING_RATE = 128

DESIRED_DOWNSAMPLE_RATE = 64

SUBJECT_EPOCHS_TRAIN = 21600

SUBJECT_EPOCHS_TEST = 21600


### PREPROCESSING

### Read and parse the data

In [None]:
# define data paths
base_path = os.path.join('drive','MyDrive','task3_data')
x_train_path_eeg1 = os.path.join(base_path, 'train_eeg1.csv')
x_train_path_eeg2 = os.path.join(base_path, 'train_eeg2.csv')
x_train_path_emg = os.path.join(base_path, 'train_emg.csv')
x_test_path_emg = os.path.join(base_path, 'test_emg.csv')
x_test_path_eeg1 = os.path.join(base_path, 'test_eeg1.csv')
x_test_path_eeg2 = os.path.join(base_path, 'test_eeg2.csv')
y_path = os.path.join(base_path, 'train_labels.csv')

In [None]:
# read the data
df_X_train_eeg1 = pd.read_csv(x_train_path_eeg1, float_precision = "round_trip", memory_map = True, engine = 'c')
df_X_train_eeg2 = pd.read_csv(x_train_path_eeg2, float_precision = "round_trip", memory_map = True, engine = 'c')
df_X_train_emg = pd.read_csv(x_train_path_emg, float_precision = "round_trip", memory_map = True, engine = 'c')
df_X_test_eeg1 = pd.read_csv(x_test_path_eeg1, float_precision = "round_trip", memory_map = True, engine = 'c')
df_X_test_eeg2 = pd.read_csv(x_test_path_eeg2, float_precision = "round_trip", memory_map = True, engine = 'c')
df_X_test_emg = pd.read_csv(x_test_path_emg, float_precision = "round_trip", memory_map = True, engine = 'c')
df_y = pd.read_csv(y_path, float_precision = "round_trip", memory_map = True, engine = 'c')


In [None]:
# get the arrays
X_train_eeg1 = df_X_train_eeg1.to_numpy()[:, 1:]
X_train_eeg2 = df_X_train_eeg2.to_numpy()[:, 1:]
X_train_emg = df_X_train_emg.to_numpy()[:, 1:]
X_test_eeg1 = df_X_test_eeg1.to_numpy()[:, 1:]
X_test_eeg2 = df_X_test_eeg2.to_numpy()[:, 1:]
X_test_emg = df_X_test_emg.to_numpy()[:, 1:]
y = df_y.to_numpy()[:, 1:]

# delete panda arrays
df_X_train_eeg1 = None
df_X_train_eeg2 = None
df_X_train_emg = None
df_X_test_eeg1 = None
df_X_test_eeg2 = None
df_X_test_emg = None
df_y = None

### Apply filtering and Downsampling

In [None]:
def butter_bandpass(low, fs, order = 4, type = "lowpass"):
    nyq = 0.5 * fs
    low = low / nyq
    b, a = butter(order, low, btype = type, analog = True, output = "ba")
    return b, a


def filter_array(data):
    filter_b, filter_a = butter_bandpass(CUTOFF, SAMPLING_RATE)
    for index in range(len(data)):
        data[index] = filtfilt(filter_b, filter_a, data[index])

    return data


def downsample_signal(data):
    seconds = len(data[0]) // SAMPLING_RATE
    new_sampling_rate = seconds * DESIRED_DOWNSAMPLE_RATE

    new_data = []
    for index in range(len(data)):
        new_data.append(resample(data[index], new_sampling_rate))

    return np.array(new_data)


def preprocess_data(X_train_eeg1, X_train_eeg2, X_train_emg, X_test_eeg1, X_test_eeg2, X_test_emg):
    # apply filtering
    X_train_eeg1 = filter_array(X_train_eeg1)
    X_train_eeg2 = filter_array(X_train_eeg2)
    X_train_emg = filter_array(X_train_emg)
    X_test_eeg1 = filter_array(X_test_eeg1)
    X_test_eeg2 = filter_array(X_test_eeg2)
    X_test_emg = filter_array(X_test_emg)

    # downsample signal
    X_train_eeg1 = downsample_signal(X_train_eeg1)
    X_train_eeg2 = downsample_signal(X_train_eeg2)
    X_train_emg = downsample_signal(X_train_emg)
    X_test_eeg1 = downsample_signal(X_test_eeg1)
    X_test_eeg2 = downsample_signal(X_test_eeg2)
    X_test_emg = downsample_signal(X_test_emg)

    return X_train_eeg1, X_train_eeg2, X_train_emg, X_test_eeg1, X_test_eeg2, X_test_emg


In [None]:
 # apply filtering
X_train_eeg1 = filter_array(X_train_eeg1)
X_train_eeg2 = filter_array(X_train_eeg2)
X_train_emg = filter_array(X_train_emg)
X_test_eeg1 = filter_array(X_test_eeg1)
X_test_eeg2 = filter_array(X_test_eeg2)
X_test_emg = filter_array(X_test_emg)

# downsample signal
X_train_eeg1 = downsample_signal(X_train_eeg1)
X_train_eeg2 = downsample_signal(X_train_eeg2)
X_train_emg = downsample_signal(X_train_emg)
X_test_eeg1 = downsample_signal(X_test_eeg1)
X_test_eeg2 = downsample_signal(X_test_eeg2)
X_test_emg = downsample_signal(X_test_emg)

### Reshape data

In [None]:
# define limits for each subject
first_subject_start_train = 2
first_subject_end_train = SUBJECT_EPOCHS_TRAIN - 3
second_subject_start_train = SUBJECT_EPOCHS_TRAIN + 2
second_subject_end_train = 2 * SUBJECT_EPOCHS_TRAIN - 3
third_subject_start_train = 2 * SUBJECT_EPOCHS_TRAIN + 2
third_subject_end_train = 3 * SUBJECT_EPOCHS_TRAIN - 3
fourth_subject_start_test = 2
fourth_subject_end_test = SUBJECT_EPOCHS_TEST - 3
fifth_subject_start_test = SUBJECT_EPOCHS_TEST + 2
fifth_subject_end_test = 2 * SUBJECT_EPOCHS_TEST - 3

In [None]:
def get_5_epochs(data, index):
    return np.concatenate((
        data[index - 2],
        data[index - 1],
        data[index],
        data[index + 1],
        data[index + 2],
    ))

def get_subject_data(start, end, eeg1, eeg2, emg, y):
    new_eeg_1 = []
    new_eeg_2 = []
    new_emg = []
    new_y = []

    for index in range(start, end + 1):
      new_eeg_1.append(
          get_5_epochs(eeg1, index)
      )
      new_eeg_2.append(
          get_5_epochs(eeg2, index)
      )
      new_emg.append(
          get_5_epochs(emg, index)
      )
      if y is not None:
        new_y.append(y[index])

    return np.array(new_eeg_1),np.array(new_eeg_2),np.array(new_emg), np.array(new_y)

In [None]:
# get first subject
first_eeg_1, first_eeg_2, first_emg, first_y = get_subject_data(first_subject_start_train, 
                                                                first_subject_end_train, 
                                                                X_train_eeg1,
                                                                X_train_eeg2,
                                                                X_train_emg, 
                                                                y)

# get second subject
second_eeg_1, second_eeg_2, second_emg, second_y = get_subject_data(second_subject_start_train, 
                                                                second_subject_end_train, 
                                                                X_train_eeg1,
                                                                X_train_eeg2,
                                                                X_train_emg, 
                                                                y)

# get third subject
third_eeg_1, third_eeg_2, third_emg, third_y = get_subject_data(third_subject_start_train, 
                                                                third_subject_end_train, 
                                                                X_train_eeg1,
                                                                X_train_eeg2,
                                                                X_train_emg, 
                                                                y)

# get fourth subject
fourth_eeg_1, fourth_eeg_2, fourth_emg, _ = get_subject_data(fourth_subject_start_test, 
                                                                fourth_subject_end_test, 
                                                                X_test_eeg1,
                                                                X_test_eeg2,
                                                                X_test_emg, 
                                                                None)

# get fifth subject
fifth_eeg_1, fifth_eeg_2, fifth_emg, _ = get_subject_data(fifth_subject_start_test, 
                                                                fifth_subject_end_test, 
                                                                X_test_eeg1,
                                                                X_test_eeg2,
                                                                X_test_emg, 
                                                                None)

## LEARNING


### DATASET

In [None]:
class CnnDataset(Dataset):
    def __init__(self, data, y):
        self.data = data
        self.y = y

    def __len__(self):
        return len(self.data)

    def __getitem__(self, item):
        return np.array([self.data[item]]), self.y[item], item


### MODEL

In [None]:

class CNNModel(torch.nn.Module):
    def __init__(self, number_of_channels, cnn_blocks, example_length, kernel_size, number_of_signals,
                 nn_first_layer_size, nn_second_layer_size, dropout):
        super(CNNModel, self).__init__()

        cnn_layers = []

        # add a BatchNorm before the first layer
        cnn_layers.append(
            BatchNorm2d(1, momentum = 0.99, track_running_stats = False)
            # LayerNorm(example_length)
        )

        # add 1x1 convolution to increase number of channels
        cnn_layers.append(
            Conv2d(
                in_channels = 1,
                out_channels = 64,
                kernel_size = [1, 1],
                stride = 1
            )
        )

        # add the CNN blocks
        for index in range(cnn_blocks):
            # add the CNN block
            cnn_layers.append(
                Conv2d(
                    in_channels = 64,
                    out_channels = 64,
                    kernel_size = [1, kernel_size],
                    stride = [1, index % 2 + 1]  # even => 1; odd => 2
                )
            )

            # add the Relu Layer
            cnn_layers.append(
                ReLU()
            )

            # update example length
            example_length = (example_length - kernel_size) // (index % 2 + 1) + 1

            # add the BatchNorm layer
            cnn_layers.append(
                BatchNorm2d(number_of_channels, momentum = 0.99, track_running_stats = False)
                # LayerNorm(example_length)
            )

            

        # define nn layers
        nn_layers = []

        # get the size per batch after flatten and concatenate
        nn_size = example_length * number_of_channels * number_of_signals

        # add dropout
        nn_layers.append(
            Dropout(
                p = dropout
            )
        )

        # define the NN first layer
        nn_layers.append(
            Linear(
                nn_size,
                nn_first_layer_size
            )
        )

        # add dropout
        nn_layers.append(
            Dropout(
                p = dropout
            )
        )

        # add ReLU
        nn_layers.append(
            ReLU()
        )

        # add the NN second layer
        nn_layers.append(
            Linear(
                nn_first_layer_size,
                nn_second_layer_size
            )
        )

        # add the Softmax
        nn_layers.append(
            LogSoftmax(dim = 1)
        )

        # add layers as object field
        self.nn_layers = ModuleList(nn_layers)
        self.cnn_layers = ModuleList(cnn_layers)

    def forward(self, tensor):

        # pass through each cnn layer
        for index, layer in enumerate(self.cnn_layers):
            tensor = layer(tensor)

        # flatten along all dimensions except batch
        tensor = tensor.view(tensor.size(0), 1, 1, tensor.size(1) * tensor.size(2) * tensor.size(3))

        # eliminate irrelevant dimensions
        tensor = tensor[:, 0, 0, :]

        # pass through each nn layer
        for index, layer in enumerate(self.nn_layers):
            tensor = layer(tensor)

        return tensor

    def save_model_to_file(self, full_path):
        """
            Saves the current model to a file in order to be able to use it later
        """

        # save model
        torch.save(self.state_dict(), full_path)

    def load_model_from_file(self, full_path):
        """
            Load model from file
        """

        # load model
        self.load_state_dict(torch.load(full_path))

        # necessary step for loading
        self.eval()


### Weight Init

In [None]:
# ==================================== WEIGHT INITIALIZER ===========================================
class WeightInitializer:
    '''
    Utiility class for initializing the weights of a network.

    Usage example:
        weightInit = WeightInitializer()
        weightInit.init_weights(model, 'xavier_normal_', {'gain':0.02})

    '''

    def __init__(self, initType = None, kwargs = { }):
        self.kwargs = kwargs
        self.weightInit = None

        if initType is not None:
            if not hasattr(torch.nn, initType):
                raise NotImplementedError('Init method [%s] does not exist in torch.nn' % initType)
            self.weightInit = getattr(torch.nn.init, initType)

    # ===============================================  INIT WEIGHTS =================================
    def init_weights(self, model, weightInit = None, kwargs = { }):
        '''
        Function called for initializeing the weights of a model
        :param model: pytorch model
        :param weightInit: init type (must be in torch.nn.init.*)
        :param kwargs: kwargs to be passed to the initialization function
        :return:
        '''

        if weightInit is not None:
            if not hasattr(torch.nn.init, weightInit):
                raise NotImplementedError('Init method %s not in torch.nn' % weightInit)
            self.weightInit = getattr(torch.nn.init, weightInit)

        self.kwargs = kwargs if kwargs != { } else self.kwargs

        model.apply(self._init_module)

    # =============================================== INIT MODULES ====================================================
    def _init_module(self, module):
        '''
        Internal function which is applied to every module in a network

        :param module: model to be applied to
        '''

        className = module.__class__.__name__

        # init conv and linear layers
        if hasattr(module, 'weight') and (className.find('Conv') != -1 or className.find('Linear') != -1):
            self.weightInit(module.weight.data, **self.kwargs)
            # init biases
            if hasattr(module, 'bias') and module.bias is not None:
                torch.nn.init.constant_(module.bias.data, 0.0)

        # init batch norm weightd
        elif className.find(
                'BatchNorm2d') != -1:  # BatchNorm Layer's weight is not a matrix; only normal distribution applies.
            torch.nn.init.normal_(module.weight.data, 1.0, 0.02)
            torch.nn.init.constant_(module.bias.data, 0.0)

    # =============================================== INIT NET =====================================================
    def parallel_net(self, net, gpus = []):

        assert (torch.cuda.is_available()), 'Cuda is not available'
        assert len(gpus) > 0, 'GPU id not specified'
        net = torch.nn.DataParallel(net, gpus)  # multi-GPUs

        return net


### TRAIN ENGINE

In [None]:
class TrainEngine():
    def __init__(self, data_dict, batch_size, smoothing_factor, max_norm, learning_rate_delta, warm_up_epochs,
                 normal_epochs, cooldown_epochs, number_of_channels, cnn_blocks, example_length, kernel_size,
                 number_of_signals, nn_first_layer_size, nn_second_layer_size, dropout, use_gpu):
        # save parameters
        self.data_dict = data_dict
        self.batch_size = batch_size
        self.smoothing_factor = smoothing_factor
        self.max_norm = max_norm
        self.learning_rate_delta = learning_rate_delta
        self.warm_up_epochs = warm_up_epochs
        self.normal_epochs = normal_epochs
        self.cooldown_epochs = cooldown_epochs
        self.number_of_channels = number_of_channels
        self.cnn_blocks = cnn_blocks
        self.example_length = example_length
        self.kernel_size = kernel_size
        self.number_of_signals = number_of_signals
        self.nn_first_layer_size = nn_first_layer_size
        self.nn_second_layer_size = nn_second_layer_size
        self.dropout = dropout

        # move model to gpu or cpu
        if use_gpu:
            self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
        else:
            self.device = torch.device("cpu")

        # define learning rate
        self.learning_rate = self.learning_rate_delta * batch_size
        self.learning_rate_step_warm = self.learning_rate / (self.warm_up_epochs + 1)
        self.learning_rate_step_cold = self.learning_rate / (self.cooldown_epochs + 1)

    def initialize_engine(self):

        # define model
        self.model = CNNModel(
            number_of_channels = self.number_of_channels,
            cnn_blocks = self.cnn_blocks,
            example_length = self.example_length,
            kernel_size = self.kernel_size,
            number_of_signals = self.number_of_signals,
            nn_first_layer_size = self.nn_first_layer_size,
            nn_second_layer_size = self.nn_second_layer_size,
            dropout = self.dropout
        )
        self.model.to(self.device)
        self.model = self.model.float()

        # initialize weights
        weightInit = WeightInitializer()
        weightInit.init_weights(self.model, 'xavier_normal_', { 'gain': 0.02 })

        # define optimizer
        self.optimizer = RMSprop(self.model.parameters(), lr = self.learning_rate_step_warm,
                                 alpha = self.smoothing_factor)

        # define learning rate algorithm
        self.scheduler = LambdaLR(self.optimizer, lr_lambda = lambda epoch: self.get_learning_rate(epoch))

    def get_labels(self, prediction):
        labels = torch.argmax(prediction, dim = 1)
        return labels + 1

    def train(self, path):
        """
        Training with cross validation using the LOOCV principle on each subject.
        """

        for cross in ['first', 'second', 'third']:
            # for cross in ['third']:

            # define loaders
            train = ['first', 'second', 'third']
            train.remove(cross)
            train_loader, cross_loader, class_weights = self.create_loaders(train, cross)

            # define loss
            self.loss = NLLLoss(weight = torch.FloatTensor(class_weights).to(self.device))

            # define engine
            self.initialize_engine()

            train_loss = []
            for epoch in range(self.warm_up_epochs + self.normal_epochs + self.cooldown_epochs):

                # set model on train mode
                self.model.train()

                print(f'Epoch: {epoch + 1}. Start time: {str(datetime.datetime.now())}')

                real_array = np.array([])
                predicted_array = np.array([])

                count = 0

                # for each batch
                for batch, real, _ in train_loader:
                    count += 1

                    # move to device
                    batch = batch.to(self.device)
                    real = real.to(self.device)

                    # change to float type
                    batch = batch.float()
                    real = real.long()

                    # set optimizer to 0
                    self.optimizer.zero_grad()

                    # get model prediction
                    prediction = self.model(batch)

                    # get labels
                    predicted_labels = self.get_labels(prediction)

                    # compute loss
                    loss = self.loss(prediction, real - 1)

                    # compute gradient
                    loss.backward()

                    # clip the gradient
                    torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.max_norm)

                    # update model
                    self.optimizer.step()

                    predicted_labels = predicted_labels.cpu().detach().numpy()
                    real = real.cpu().detach().numpy()
                    predicted_array = np.concatenate((predicted_array, predicted_labels))
                    real_array = np.concatenate((real_array, real))

                train_loss.append(balanced_accuracy_score(real_array, predicted_array))
                print(f'Train loss: {train_loss[-1]}')

                # update learning rate
                self.scheduler.step()

                # move model to eval mode
                self.model.eval()

                real_array = np.array([])
                predicted_array = np.array([])
                item_array = np.array([])
                predicted_probabilities = None

                count = 0

                # for each batch
                for batch, real, item in cross_loader:
                    count += 1

                    # move to device
                    batch = batch.to(self.device)
                    real = real.to(self.device)

                    # change to float type
                    batch = batch.float()
                    real = real.long()

                    # get model prediction
                    prediction = self.model(batch)

                    # get labels
                    predicted_labels = self.get_labels(prediction)

                    predicted_labels = predicted_labels.cpu().detach().numpy()
                    real = real.cpu().detach().numpy()
                    item = item.cpu().detach().numpy()
                    predicted_array = np.concatenate((predicted_array, predicted_labels))
                    real_array = np.concatenate((real_array, real))
                    item_array = np.concatenate((item_array, item))

                    if predicted_probabilities is None:
                        predicted_probabilities = prediction.cpu().detach().numpy()
                    else:
                        predicted_probabilities = np.concatenate(
                            (predicted_probabilities, prediction.cpu().detach().numpy()))

                print(f'Cross loss: {balanced_accuracy_score(real_array, predicted_array)}')

            # save predicted labels
            predicted_rearranged = [0 for _ in range(len(item_array))]
            for index, item in enumerate(item_array):
                item = int(item)
                predicted_rearranged[item] = predicted_probabilities[index]
            np.save(os.path.join(path, f'{cross}_prediction.npy'), predicted_rearranged)

        return real_array, predicted_array

    def full_train(self, path):
        """
        Full training of the model, using all the available data.
        """

        # define the training and test set
        train_data = np.concatenate(
            (self.data_dict['first']['train'], self.data_dict['second']['train'], self.data_dict['third']['train']))
        train_y = np.concatenate(
            (self.data_dict['first']['y'], self.data_dict['second']['y'], self.data_dict['third']['y']))

        test_data = np.concatenate(
            (self.data_dict['fourth']['test'], self.data_dict['fifth']['test'])
        )
        test_y = np.array([0 for _ in range(test_data.shape[0])])

        dataset_train = CnnDataset(train_data, train_y)
        train_loader = DataLoader(dataset_train, batch_size = self.batch_size, shuffle = True)

        dataset_test = CnnDataset(test_data, test_y)
        test_loader = DataLoader(dataset_test, batch_size = self.batch_size, shuffle = True)

        class_weights = compute_class_weight('balanced', np.unique(train_y), np.array(train_y))

        # define loss
        self.loss = NLLLoss(weight = torch.FloatTensor(class_weights).to(self.device))

        # define engine
        self.initialize_engine()

        train_loss = []
        for epoch in range(self.warm_up_epochs + self.normal_epochs + self.cooldown_epochs):

            # set model on train mode
            self.model.train()

            print(f'Epoch: {epoch + 1}. Start time: {str(datetime.datetime.now())}')

            real_array = np.array([])
            predicted_array = np.array([])

            count = 0

            # for each batch
            for batch, real, _ in train_loader:
                count += 1

                # move to device
                batch = batch.to(self.device)
                real = real.to(self.device)

                # change to float type
                batch = batch.float()
                real = real.long()

                # set optimizer to 0
                self.optimizer.zero_grad()

                # get model prediction
                prediction = self.model(batch)

                # get labels
                predicted_labels = self.get_labels(prediction)

                # compute loss
                loss = self.loss(prediction, real - 1)

                # compute gradient
                loss.backward()

                # clip the gradient
                torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.max_norm)

                # update model
                self.optimizer.step()

                predicted_labels = predicted_labels.cpu().detach().numpy()
                real = real.cpu().detach().numpy()
                predicted_array = np.concatenate((predicted_array, predicted_labels))
                real_array = np.concatenate((real_array, real))

            train_loss.append(balanced_accuracy_score(real_array, predicted_array))
            print(f'Train loss: {train_loss[-1]}')

            # update learning rate
            self.scheduler.step()

        # move model to eval mode
        self.model.eval()

        predicted_array = np.array([])
        predicted_probabilities = None
        item_array = np.array([])

        count = 0

        # for each batch
        for batch, _, item in test_loader:
            count += 1

            # move to device
            batch = batch.to(self.device)

            # change to float type
            batch = batch.float()

            # get model prediction
            prediction = self.model(batch)

            # get labels
            predicted_labels = self.get_labels(prediction)

            predicted_labels = predicted_labels.cpu().detach().numpy()
            item = item.cpu().detach().numpy()
            item_array = np.concatenate((item_array, item))
            predicted_array = np.concatenate((predicted_array, predicted_labels))
            if predicted_probabilities is None:
                predicted_probabilities = prediction.cpu().detach().numpy()
            else:
                predicted_probabilities = np.concatenate((predicted_probabilities, prediction.cpu().detach().numpy()))

        # save predicted labels
        predicted_rearranged = [0 for _ in range(len(item_array))]
        for index, item in enumerate(item_array):
            item = int(item)
            predicted_rearranged[item] = predicted_probabilities[index]
        np.save(os.path.join(path, 'test_prediction.npy'), predicted_rearranged)

        return predicted_probabilities, item_array

    def get_learning_rate(self, epoch):

        lr = None
        if epoch < self.warm_up_epochs:
            lr = (epoch + 1) * self.learning_rate_step_warm
        else:
            if epoch < self.warm_up_epochs + self.normal_epochs:
                lr = self.learning_rate
            else:
                epochs = epoch - self.warm_up_epochs - self.normal_epochs
                lr = self.learning_rate - epochs * self.learning_rate_step_cold

        print(lr, epoch)
        return lr

    def create_loaders(self, train, cross):
        train_data = np.concatenate((self.data_dict[train[0]]['train'], self.data_dict[train[1]]['train']))
        train_y = np.concatenate((self.data_dict[train[0]]['y'], self.data_dict[train[1]]['y']))

        cross_data = self.data_dict[cross]['train']
        cross_y = self.data_dict[cross]['y']

        dataset_train = CnnDataset(train_data, train_y)
        train_loader = DataLoader(dataset_train, batch_size = self.batch_size, shuffle = True)

        dataset_cross = CnnDataset(cross_data, cross_y)
        cross_loader = DataLoader(dataset_cross, batch_size = self.batch_size, shuffle = True)

        return train_loader, cross_loader, compute_class_weight('balanced', np.unique(train_y), np.array(train_y))


### TRAINING CONSTANTS

In [None]:
# define constants influencing the learning part of the pipeline
DROPOUT = 0.2

NN_SECOND_LAYER_SIZE = 3

NN_FIRST_LAYER_SIZE = 80

NUMBER_OF_SIGNALS = 3

KERNEL_SIZE = 5

CNN_BLOCKS = 8

NUMBER_OF_CHANNELS = 64

COOLDOWN_EPOCHS = 5

NORMAL_EPOCHS = 10

WARM_UP_EPOCHS = 5

LEARNING_RATE_DELTA = 0.00128

MAX_NORM = 0.1

SMOOTHING_FACTOR = 0.99

BATCH_SIZE = 256

### TRAINING

### Create suitable data format for training

In [None]:
def create_cnn_format(eeg_1, eeg_2, emg):
    
    data = []
    for index in range(len(eeg_1)):
        data.append(
            [
                eeg_1[index],
                eeg_2[index],
                emg[index]
            ]
        )

    return np.array(data)

In [None]:
# transform each subject to the suitable cnn format
first_subject = create_cnn_format(first_eeg_1, first_eeg_2, first_emg)
second_subject = create_cnn_format(second_eeg_1, second_eeg_2, second_emg)
third_subject = create_cnn_format(third_eeg_1, third_eeg_2, third_emg)
fourth_subject = create_cnn_format(fourth_eeg_1, fourth_eeg_2, fourth_emg)
fifth_subject = create_cnn_format(fifth_eeg_1, fifth_eeg_2, fifth_emg)

In [None]:
# create training dict
data_dict = { }
data_dict['first'] = {
    'train': first_subject,
    'y': first_y[:, 0]
}
data_dict['second'] = {
    'train': second_subject,
    'y': second_y[:, 0]
}
data_dict['third'] = {
    'train': third_subject,
    'y': third_y[:, 0]
}
data_dict['fourth'] = {
    'test': fourth_subject
}
data_dict['fifth'] = {
    'test': fifth_subject
}

In [None]:
# force memory deallocation
first_eeg_1 = None
first_eeg_2 = None
first_emg = None

second_eeg_1 = None
second_eeg_2 = None
second_emg = None

third_eeg_1 = None
third_eeg_2 = None
third_emg = None

fourth_eeg_1 = None
fourth_eeg_2 = None
fourth_emg = None

fifth_eeg_1 = None
fifth_eeg_2 = None
fifth_emg = None

### Define and train model

In [None]:
# define model
train_engine = TrainEngine(
        data_dict = data_dict,
        batch_size = BATCH_SIZE,
        smoothing_factor = SMOOTHING_FACTOR,
        max_norm = MAX_NORM,
        learning_rate_delta = LEARNING_RATE_DELTA,
        warm_up_epochs = WARM_UP_EPOCHS,
        normal_epochs = NORMAL_EPOCHS,
        cooldown_epochs = COOLDOWN_EPOCHS,
        number_of_channels = NUMBER_OF_CHANNELS,
        cnn_blocks = CNN_BLOCKS,
        example_length = first_subject.shape[-1],
        kernel_size = KERNEL_SIZE,
        number_of_signals = NUMBER_OF_SIGNALS,
        nn_first_layer_size = NN_FIRST_LAYER_SIZE,
        nn_second_layer_size = NN_SECOND_LAYER_SIZE,
        dropout = DROPOUT,
        use_gpu = True
    )

# perform LOOCV training
real, predicted = train_engine.train(base_path)

# perform full training, without 
predicted_probabilities, item_array = train_engine.full_train(base_path)

In [None]:
# dataset_train = CnnDataset(data_dict['third']['train'], data_dict['third']['y'])
# train_loader = DataLoader(dataset_train, batch_size = 256, shuffle = True)

# model = train_engine.model
# model.to(torch.device("cuda:0"))

# predicted_array = np.array([])
# real_array = np.array([])
# item_array = np.array([])

# for batch, real, item in train_loader:
#   batch = batch.to(torch.device("cuda:0"))

#   # change to float type
#   batch = batch.float()

#   # get model prediction
#   prediction = model(batch)

#   predicted_labels = torch.argmax(prediction, dim = 1) + 1
#   predicted_labels = predicted_labels.cpu().detach().numpy()
#   real = real.cpu().detach().numpy()
#   item = item.cpu().detach().numpy()
#   predicted_array = np.concatenate((predicted_array, predicted_labels))
#   real_array = np.concatenate((real_array, real))
#   item_array = np.concatenate((item_array, item))

# print(balanced_accuracy_score(real_array, predicted_array))
# print(item_array)

# predicted_rearranged = [0 for _ in range(21600 - 4)]
# for index, item in enumerate(item_array):
#   item = int(item)
#   predicted_rearranged[item] = predicted_array[index]

# real = third_y[:,0]
# balanced_accuracy_score(real, predicted_rearranged)

In [None]:
# prediction = np.argmax(predicted_probabilities, axis = 1)

# predicted_rearranged = [0 for _ in range(len(item_array))]
# for index, item in enumerate(item_array):
#   item = int(item)
#   predicted_rearranged[item] = prediction[index]

# prediction = predicted_rearranged
# prediction = np.array(prediction)

# middle = prediction.shape[0] // 2
# fourth_prediction = prediction[:middle]
# fifth_prediction = prediction[middle:]

# final_prediction = np.concatenate((
#     np.array([fourth_prediction[0], fourth_prediction[0]]),
#     fourth_prediction,
#     np.array([fourth_prediction[-1], fourth_prediction[-1]]),
#     np.array([fifth_prediction[0], fifth_prediction[0]]),
#     fifth_prediction,
#     np.array([fifth_prediction[-1], fifth_prediction[-1]])
# ))

# # create submission
# submission = []
# for index in range(len(final_prediction)):
#   submission.append(
#       [
#         index,
#         final_prediction[index] + 1
#       ]
#   )

# export = pd.DataFrame(data = np.array(submission), columns = ['Id','y'])
# export.to_csv(os.path.join(base_path, f'submission_initial.csv'), encoding = 'utf-8', index = False)