In [1]:
import numpy as np
import argparse
import os
import imp
import re
import pickle5 as pickle
import datetime
import random
import math
import logging
import copy
import matplotlib.pyplot as plt
import sklearn
import logging
from sklearn.cluster import KMeans
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import KFold
from sklearn.neighbors import kneighbors_graph
from torch.nn.utils.rnn import pad_packed_sequence, pack_padded_sequence
import torch
from torch import nn
import torch.nn.utils.rnn as rnn_utils
from torch.utils import data
from torch.autograd import Variable
import torch.nn.functional as F
from torch.nn import Parameter

from utils import utils
from utils.readers import InHospitalMortalityReader
from utils.preprocessing import Discretizer, Normalizer
from utils import metrics
from utils import common_utils
from torch.autograd import Function
from torch.utils.data import Dataset
from torch.utils.data import DataLoader

  after removing the cwd from sys.path.
  from .autonotebook import tqdm as notebook_tqdm


In [2]:
target_dataset = 'TJ' 
RANDOM_SEED = 43
np.random.seed(RANDOM_SEED) #numpy
random.seed(RANDOM_SEED)
torch.manual_seed(RANDOM_SEED) # cpu
torch.cuda.manual_seed(RANDOM_SEED) #gpu
torch.backends.cudnn.deterministic=True # cudnn

# Use CUDA if available
device = torch.device("cuda:7" if torch.cuda.is_available() == True else 'cpu')
# print("available device: {}".format(device))
reverse = False
model_name = 'distcare_adversal'

In [3]:
if reverse:
    file_name = 'log_file' + '_' + model_name + '_' + target_dataset + '_' + 'reverse' + '.log'
else:
    file_name = 'log_file' + '_' + model_name + '_' + target_dataset + '.log'
def get_logger(name, file_name):
    logger = logging.getLogger(name)
    logger.setLevel(logging.INFO)
    
    # 以下两行是为了在jupyter notebook 中不重复输出日志
    if logger.root.handlers:
        logger.root.handlers[0].setLevel(logging.WARNING)
 
    handler_stdout = logging.StreamHandler()
    handler_stdout.setLevel(logging.INFO)
    handler_stdout.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
    # logger.addHandler(handler_stdout)
 
    handler_file = logging.FileHandler(filename=file_name, mode='w', encoding='utf-8')
    handler_file.setLevel(logging.DEBUG)
    handler_file.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
    logger.addHandler(handler_file)
 
    return logger

logger = get_logger(__name__,file_name)

logger.debug('这是希望输出的debug内容')
logger.info('这是希望输出的info内容')
logger.warning('这是希望输出的warning内容')

## Get source data

In [4]:
def get_n2n_data(x, y, x_len):
    length = len(x)
    assert length == len(y)
    assert length == len(x_len)
    new_x = []
    new_y = []
    new_x_len = []
    for i in range(length):
        for j in range(len(x[i])):
            new_x.append(x[i][:j+1])
            new_y.append(y[i][j])
            new_x_len.append(j+1)
    return new_x, new_y, new_x_len

In [5]:
source_data_path = './data/Challenge/'
small_part = False
arg_timestep = 1.0
batch_size = 256
epochs = 100
all_x_source = pickle.load(open(source_data_path + 'new_x_front_fill.dat', 'rb'))
all_y_source = pickle.load(open(source_data_path + 'new_y_front_fill.dat', 'rb'))
all_names_source = pickle.load(open(source_data_path + 'new_name.dat', 'rb'))
static_source = pickle.load(open(source_data_path + 'new_demo_front_fill.dat', 'rb'))
mask_x_source = pickle.load(open(source_data_path + 'new_mask_x.dat', 'rb'))
mask_demo_source = pickle.load(open(source_data_path + 'new_mask_demo.dat', 'rb'))
all_x_len_source = [len(i) for i in all_x_source]

if target_dataset == 'PD':
    subset_idx_source = [31, 29, 28, 33, 25, 18, 7, 21, 16, 15, 19, 17, 24, 3, 5, 0]
elif target_dataset == 'TJ':
    subset_idx_source = [27, 29, 18, 16, 26, 33, 28, 31, 32, 15, 11, 25, 21, 20, 9, 17, 30, 19]
elif target_dataset == 'HM':
    subset_idx_source = [0, 1, 2, 3, 5, 9, 11, 12, 13, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33]

subset_cnt = len(subset_idx_source)
other_idx = []
for i in range(len(all_x_source[0][0])):
    if i not in subset_idx_source:
        other_idx.append(i)

for i in range(len(all_x_source)): #将共同特征移动到最开始，非共同特征移动到末尾
    cur = np.array(all_x_source[i], dtype=float)
    cur_mask = np.array(mask_x_source[i])
    cur_subset = cur[:, subset_idx_source]
    cur_other = cur[:, other_idx]
    cur_mask_subset = cur_mask[:, subset_idx_source]
    cur_mask_other = cur_mask[:, other_idx]
    all_x_source[i] = np.concatenate((cur_subset, cur_other), axis=1).tolist()
    mask_x_source[i] = np.concatenate((cur_mask_subset, cur_mask_other), axis=1).tolist()


train_num_source =int( len(all_x_source) * 0.8) + 1
logger.info(train_num_source)
dev_num_source =int( len(all_x_source) * 0.1) + 1
logger.info(dev_num_source)
test_num_source =int( len(all_x_source) * 0.1)
logger.info(test_num_source)
assert(train_num_source+dev_num_source+test_num_source == len(all_x_source))

train_x_source = []
train_y_source = []
train_names_source = []
train_static_source = []
train_x_len_source = []
train_mask_x_source = []
for idx in range(train_num_source):
    train_x_source.append(all_x_source[idx])
    train_y_source.append(int(all_y_source[idx][-1]))
    train_names_source.append(all_names_source[idx])
    train_static_source.append(static_source[idx])
    train_x_len_source.append(all_x_len_source[idx])
    train_mask_x_source.append(mask_x_source[idx])

dev_x_source = []
dev_y_source = []
dev_names_source = []
dev_static_source = []
dev_x_len_source = []
dev_mask_x_source = []
for idx in range(train_num_source, train_num_source + dev_num_source):
    dev_x_source.append(all_x_source[idx])
    dev_y_source.append(int(all_y_source[idx][-1]))
    dev_names_source.append(all_names_source[idx])
    dev_static_source.append(static_source[idx])
    dev_x_len_source.append(all_x_len_source[idx])
    dev_mask_x_source.append(mask_x_source[idx])


test_x = []
test_y = []
test_names = []
test_static = []
test_x_len = []
test_mask_x = []
for idx in range(train_num_source + dev_num_source, train_num_source + dev_num_source + test_num_source):
    test_x.append(all_x_source[idx])
    test_y.append(int(all_y_source[idx][-1]))
    test_names.append(all_names_source[idx])
    test_static.append(static_source[idx])
    test_x_len.append(all_x_len_source[idx])
    test_mask_x.append(mask_x_source[idx])


assert(len(train_x_source) == train_num_source)
assert(len(dev_x_source) == dev_num_source)
assert(len(test_x) == test_num_source)

long_x_source = all_x_source
long_y_source = [y[-1] for y in all_y_source]


In [6]:
def get_loss(y_pred, y_true):
    loss = torch.nn.BCELoss()
    return loss(y_pred, y_true)

In [7]:
def get_re_loss(y_pred, y_true):
    loss = torch.nn.MSELoss()
    return loss(y_pred, y_true)

In [8]:
def get_kl_loss(x_pred, x_target):
    loss = torch.nn.KLDivLoss(reduce=True, size_average=True)
    return loss(x_pred, x_target)

In [9]:
def get_wass_dist(x_pred, x_target):
    m1 = torch.mean(x_pred, dim=0)
    m2 = torch.mean(x_target, dim=0)
    v1 = torch.var(x_pred, dim=0)
    v2 = torch.var(x_target, dim=0)
    p1 = torch.sum(torch.pow((m1 - m2), 2))
    p2 = torch.sum(torch.pow(torch.pow(v1, 1/2) - torch.pow(v2, 1/2), 2))
    return torch.pow(p1+p2, 1/2)

In [10]:
def pad_sents(sents, pad_token):
#     print(f'len(pad_token) is {len(pad_token)}')
#     print(f'sents is {sents}')

    sents_padded = []

    max_length = max([len(_) for _ in sents])
    for i in sents:
        padded = list(i) + [pad_token]*(max_length-len(i))
#         print(f'padded is {padded}')
        sents_padded.append(np.array(padded))


    return np.array(sents_padded)

In [11]:
def batch_iter(x, y, lens, batch_size, shuffle=False):
    """ Yield batches of source and target sentences reverse sorted by length (largest to smallest).
    @param data (list of (src_sent, tgt_sent)): list of tuples containing source and target sentence
    @param batch_size (int): batch size
    @param shuffle (boolean): whether to randomly shuffle the dataset
    """
    # batch_num = math.ceil(len(x) / batch_size) # 向下取整
    batch_num = len(x) // batch_size if len(x) % batch_size == 0 else len(x) // batch_size + 1
    # print(f"len(x) is {len(x)}, len(y) is {len(y)}, len(lens) is {len(lens)}, batch_size is {batch_size}, batch_num is {batch_num}")
    index_array = list(range(len(x)))

    if shuffle:
        np.random.shuffle(index_array)

    for i in range(batch_num):
        if (i + 1) * batch_size  < len(x):
            indices = index_array[i * batch_size: (i + 1) * batch_size] #  fetch out all the induces
        else:
            indices = index_array[i * batch_size: ]
        examples = []
        for idx in indices:
            examples.append((x[idx], y[idx],lens[idx]))
       
        examples = sorted(examples, key=lambda e: len(e[0]), reverse=True)
    
        batch_x = [e[0] for e in examples]
        batch_y = [e[1] for e in examples]
        batch_lens = [e[2] for e in examples]

        yield batch_x, batch_y, batch_lens

In [12]:
def length_to_mask(length, max_len=None, dtype=None):
    """length: B.
    return B x max_len.
    If max_len is None, then max of length will be used.
    """
    assert len(length.shape) == 1, 'Length shape should be 1 dimensional.'
    max_len = max_len or length.max().item()
    mask = torch.arange(max_len, device=length.device,
                        dtype=length.dtype).expand(len(length), max_len) < length.unsqueeze(1)
    if dtype is not None:
        mask = torch.as_tensor(mask, dtype=dtype, device=length.device)
    return mask

In [13]:
class SingleAttention(nn.Module):
    def __init__(self, attention_input_dim, attention_hidden_dim, attention_type='add', demographic_dim=12, time_aware=False, use_demographic=False):
        super(SingleAttention, self).__init__()
        
        self.attention_type = attention_type
        self.attention_hidden_dim = attention_hidden_dim
        self.attention_input_dim = attention_input_dim
        self.use_demographic = use_demographic
        self.demographic_dim = demographic_dim
        self.time_aware = time_aware

        # batch_time = torch.arange(0, batch_mask.size()[1], dtype=torch.float32).reshape(1, batch_mask.size()[1], 1)
        # batch_time = batch_time.repeat(batch_mask.size()[0], 1, 1)
        
        if attention_type == 'add':
            if self.time_aware == True:
                # self.Wx = nn.Parameter(torch.randn(attention_input_dim+1, attention_hidden_dim))
                self.Wx = nn.Parameter(torch.randn(attention_input_dim, attention_hidden_dim))
                self.Wtime_aware = nn.Parameter(torch.randn(1, attention_hidden_dim))
                nn.init.kaiming_uniform_(self.Wtime_aware, a=math.sqrt(5))
            else:
                self.Wx = nn.Parameter(torch.randn(attention_input_dim, attention_hidden_dim))
            self.Wt = nn.Parameter(torch.randn(attention_input_dim, attention_hidden_dim))
            self.Wd = nn.Parameter(torch.randn(demographic_dim, attention_hidden_dim))
            self.bh = nn.Parameter(torch.zeros(attention_hidden_dim,))
            self.Wa = nn.Parameter(torch.randn(attention_hidden_dim, 1))
            self.ba = nn.Parameter(torch.zeros(1,))
            
            nn.init.kaiming_uniform_(self.Wd, a=math.sqrt(5))
            nn.init.kaiming_uniform_(self.Wx, a=math.sqrt(5))
            nn.init.kaiming_uniform_(self.Wt, a=math.sqrt(5))
            nn.init.kaiming_uniform_(self.Wa, a=math.sqrt(5))
        elif attention_type == 'mul':
            self.Wa = nn.Parameter(torch.randn(attention_input_dim, attention_input_dim))
            self.ba = nn.Parameter(torch.zeros(1,))
            
            nn.init.kaiming_uniform_(self.Wa, a=math.sqrt(5))
        elif attention_type == 'concat':
            if self.time_aware == True:
                self.Wh = nn.Parameter(torch.randn(2*attention_input_dim+1, attention_hidden_dim))
            else:
                self.Wh = nn.Parameter(torch.randn(2*attention_input_dim, attention_hidden_dim))

            self.Wa = nn.Parameter(torch.randn(attention_hidden_dim, 1))
            self.ba = nn.Parameter(torch.zeros(1,))
            
            nn.init.kaiming_uniform_(self.Wh, a=math.sqrt(5))
            nn.init.kaiming_uniform_(self.Wa, a=math.sqrt(5))
        else:
            raise RuntimeError('Wrong attention type.')
        
        self.tanh = nn.Tanh()
        self.softmax = nn.Softmax()
    
    def forward(self, input, demo=None):
 
        batch_size, time_step, input_dim = input.size() # batch_size * time_step * hidden_dim(i)
        time_decays = torch.tensor(range(time_step-1,-1,-1), dtype=torch.float32).unsqueeze(-1).unsqueeze(0).to(device)# 1*t*1
        b_time_decays = time_decays.repeat(batch_size,1,1)# b t 1
        
        if self.attention_type == 'add': #B*T*I  @ H*I
            q = torch.matmul(input[:,-1,:], self.Wt)# b h
            q = torch.reshape(q, (batch_size, 1, self.attention_hidden_dim)) #B*1*H
            if self.time_aware == True:
                # k_input = torch.cat((input, time), dim=-1)
                k = torch.matmul(input, self.Wx)#b t h
                # k = torch.reshape(k, (batch_size, 1, time_step, self.attention_hidden_dim)) #B*1*T*H
                time_hidden = torch.matmul(b_time_decays, self.Wtime_aware)#  b t h
            else:
                k = torch.matmul(input, self.Wx)# b t h
                # k = torch.reshape(k, (batch_size, 1, time_step, self.attention_hidden_dim)) #B*1*T*H
            if self.use_demographic == True:
                d = torch.matmul(demo, self.Wd) #B*H
                d = torch.reshape(d, (batch_size, 1, self.attention_hidden_dim)) # b 1 h
            h = q + k + self.bh # b t h
            if self.time_aware == True:
                h += time_hidden
            h = self.tanh(h) #B*T*H
            e = torch.matmul(h, self.Wa) + self.ba #B*T*1
            e = torch.reshape(e, (batch_size, time_step))# b t
        elif self.attention_type == 'mul':
            e = torch.matmul(input[:,-1,:], self.Wa)#b i
            e = torch.matmul(e.unsqueeze(1), input.permute(0,2,1)).squeeze() + self.ba #b t
        elif self.attention_type == 'concat':
            q = input[:,-1,:].unsqueeze(1).repeat(1,time_step,1)# b t i
            k = input
            c = torch.cat((q, k), dim=-1) #B*T*2I
            if self.time_aware == True:
                c = torch.cat((c, b_time_decays), dim=-1) #B*T*2I+1
            h = torch.matmul(c, self.Wh)
            h = self.tanh(h)
            e = torch.matmul(h, self.Wa) + self.ba #B*T*1
            e = torch.reshape(e, (batch_size, time_step)) # b t 
        
        a = self.softmax(e) #B*T
        v = torch.matmul(a.unsqueeze(1), input).squeeze() #B*I

        return v, a

class FinalAttentionQKV(nn.Module):
    def __init__(self, attention_input_dim, attention_hidden_dim, attention_type='add', dropout=None):
        super(FinalAttentionQKV, self).__init__()
        
        self.attention_type = attention_type
        self.attention_hidden_dim = attention_hidden_dim
        self.attention_input_dim = attention_input_dim


        self.W_q = nn.Linear(attention_input_dim, attention_hidden_dim)
        self.W_k = nn.Linear(attention_input_dim, attention_hidden_dim)
        self.W_v = nn.Linear(attention_input_dim, attention_hidden_dim)

        self.W_out = nn.Linear(attention_hidden_dim, 1)

        self.b_in = nn.Parameter(torch.zeros(1,))
        self.b_out = nn.Parameter(torch.zeros(1,))

        nn.init.kaiming_uniform_(self.W_q.weight, a=math.sqrt(5))
        nn.init.kaiming_uniform_(self.W_k.weight, a=math.sqrt(5))
        nn.init.kaiming_uniform_(self.W_v.weight, a=math.sqrt(5))
        nn.init.kaiming_uniform_(self.W_out.weight, a=math.sqrt(5))

        self.Wh = nn.Parameter(torch.randn(2*attention_input_dim, attention_hidden_dim))
        self.Wa = nn.Parameter(torch.randn(attention_hidden_dim, 1))
        self.ba = nn.Parameter(torch.zeros(1,))
        
        nn.init.kaiming_uniform_(self.Wh, a=math.sqrt(5))
        nn.init.kaiming_uniform_(self.Wa, a=math.sqrt(5))
        
        self.dropout = nn.Dropout(p=dropout)
        self.tanh = nn.Tanh()
        self.softmax = nn.Softmax()
        self.sigmoid = nn.Sigmoid()
    
    def forward(self, input):
 
        batch_size, time_step, input_dim = input.size() # batch_size * input_dim + 1 * hidden_dim(i)
        input_q = self.W_q(torch.mean(input, dim=1)) # b h
        input_k = self.W_k(input)# b t h
        input_v = self.W_v(input)# b t h

        if self.attention_type == 'add': #B*T*I  @ H*I

            q = torch.reshape(input_q, (batch_size, 1, self.attention_hidden_dim)) #B*1*H
            h = q + input_k + self.b_in # b t h
            h = self.tanh(h) #B*T*H
            e = self.W_out(h) # b t 1
            e = torch.reshape(e, (batch_size, time_step))# b t

        elif self.attention_type == 'mul':
            q = torch.reshape(input_q, (batch_size, self.attention_hidden_dim, 1)) #B*h 1
            e = torch.matmul(input_k, q).squeeze()#b t
            
        elif self.attention_type == 'concat':
            q = input_q.unsqueeze(1).repeat(1,time_step,1)# b t h
            k = input_k
            c = torch.cat((q, k), dim=-1) #B*T*2I
            h = torch.matmul(c, self.Wh)
            h = self.tanh(h)
            e = torch.matmul(h, self.Wa) + self.ba #B*T*1
            e = torch.reshape(e, (batch_size, time_step)) # b t 
        
        a = self.softmax(e) #B*T
        if self.dropout is not None:
            a = self.dropout(a)
        v = torch.matmul(a.unsqueeze(1), input_v).squeeze() #B*I

        return v, a

def clones(module, N):
    "Produce N identical layers."
    return nn.ModuleList([copy.deepcopy(module) for _ in range(N)])

def tile(a, dim, n_tile):
    init_dim = a.size(dim)
    repeat_idx = [1] * a.dim()
    repeat_idx[dim] = n_tile
    a = a.repeat(*(repeat_idx))
    order_index = torch.LongTensor(np.concatenate([init_dim * np.arange(n_tile) + i for i in range(init_dim)])).to(device)
    return torch.index_select(a, dim, order_index).to(device)

class PositionwiseFeedForward(nn.Module): # new added
    "Implements FFN equation."
    def __init__(self, d_model, d_ff, dropout=0.1):
        super(PositionwiseFeedForward, self).__init__()
        self.w_1 = nn.Linear(d_model, d_ff)
        self.w_2 = nn.Linear(d_ff, d_model)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        return self.w_2(self.dropout(F.relu(self.w_1(x)))), None

    
class PositionalEncoding(nn.Module): # new added / not use anymore
    "Implement the PE function."
    def __init__(self, d_model, dropout, max_len=400):
        super(PositionalEncoding, self).__init__()
        self.dropout = nn.Dropout(p=dropout)
        
        # Compute the positional encodings once in log space.
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0., max_len).unsqueeze(1)
        div_term = torch.exp(torch.arange(0., d_model, 2) * -(math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0)
        self.register_buffer('pe', pe)
        
    def forward(self, x):
        x = x + Variable(self.pe[:, :x.size(1)], 
                         requires_grad=False)
        return self.dropout(x)

def subsequent_mask(size):
    "Mask out subsequent positions."
    attn_shape = (1, size, size)
    subsequent_mask = np.triu(np.ones(attn_shape), k=1).astype('uint8')
    return torch.from_numpy(subsequent_mask) == 0 

def attention(query, key, value, mask=None, dropout=None):
    "Compute 'Scaled Dot Product Attention'"
    d_k = query.size(-1)# b h t d_k
    scores = torch.matmul(query, key.transpose(-2, -1)) \
             / math.sqrt(d_k) # b h t t
    if mask is not None:# 1 1 t t
        scores = scores.masked_fill(mask == 0, -1e9)# b h t t 
    p_attn = F.softmax(scores, dim = -1)# b h t t
    if dropout is not None:
        p_attn = dropout(p_attn)
    return torch.matmul(p_attn, value), p_attn # b h t v (d_k) 
    
class MultiHeadedAttention(nn.Module):
    def __init__(self, h, d_model, dropout=0):
        "Take in model size and number of heads."
        super(MultiHeadedAttention, self).__init__()
        assert d_model % h == 0
        # We assume d_v always equals d_k
        self.d_k = d_model // h
        self.h = h
        self.linears = clones(nn.Linear(d_model, self.d_k * self.h), 3)
        self.final_linear = nn.Linear(d_model, d_model)
        self.attn = None
        self.dropout = nn.Dropout(p=dropout)
        
    def forward(self, query, key, value, mask=None):
        if mask is not None:
            # Same mask applied to all h heads.
            mask = mask.unsqueeze(1) # 1 1 t t

        nbatches = query.size(0)# b
        input_dim = query.size(1)# i+1
        feature_dim = query.size(-1)# i+1

        #input size -> # batch_size * d_input * hidden_dim
        
        # d_model => h * d_k 
        query, key, value = \
            [l(x).view(nbatches, -1, self.h, self.d_k).transpose(1, 2)
             for l, x in zip(self.linears, (query, key, value))] # b num_head d_input d_k
        
       
        x, self.attn = attention(query, key, value, mask=mask, 
                                 dropout=self.dropout)# b num_head d_input d_v (d_k) 
        
        x = x.transpose(1, 2).contiguous() \
             .view(nbatches, -1, self.h * self.d_k)# batch_size * d_input * hidden_dim

        #DeCov 
        DeCov_contexts = x.transpose(0, 1).transpose(1, 2) # I+1 H B
#         print(DeCov_contexts.shape)
        Covs = cov(DeCov_contexts[0,:,:])
        DeCov_loss = 0.5 * (torch.norm(Covs, p = 'fro')**2 - torch.norm(torch.diag(Covs))**2 ) 
        for i in range(11 -1):
            Covs = cov(DeCov_contexts[i+1,:,:])
            DeCov_loss += 0.5 * (torch.norm(Covs, p = 'fro')**2 - torch.norm(torch.diag(Covs))**2 ) 


        return self.final_linear(x), DeCov_loss

class LayerNorm(nn.Module):
    def __init__(self, features, eps=1e-7):
        super(LayerNorm, self).__init__()
        self.a_2 = nn.Parameter(torch.ones(features))
        self.b_2 = nn.Parameter(torch.zeros(features))
        self.eps = eps

    def forward(self, x):
        mean = x.mean(-1, keepdim=True)
        std = x.std(-1, keepdim=True)
        return self.a_2 * (x - mean) / (std + self.eps) + self.b_2

def cov(m, y=None):
    if y is not None:
        m = torch.cat((m, y), dim=0)
    m_exp = torch.mean(m, dim=1)
    x = m - m_exp[:, None]
    cov = 1 / (x.size(1) - 1) * x.mm(x.t())
    return cov

class SublayerConnection(nn.Module):
    """
    A residual connection followed by a layer norm.
    Note for code simplicity the norm is first as opposed to last.
    """
    def __init__(self, size, dropout):
        super(SublayerConnection, self).__init__()
        self.norm = LayerNorm(size)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x, sublayer):
        "Apply residual connection to any sublayer with the same size."
        returned_value = sublayer(self.norm(x))
        return x + self.dropout(returned_value[0]) , returned_value[1]

class distcare_teacher(nn.Module):
    def __init__(self, input_dim, hidden_dim, d_model,  MHD_num_head, d_ff, output_dim, keep_prob=0.6):
        super(distcare_teacher, self).__init__()

        # hyperparameters
        self.input_dim = input_dim  
        self.hidden_dim = hidden_dim  # d_model
        self.d_model = d_model
        self.MHD_num_head = MHD_num_head
        self.d_ff = d_ff
        self.output_dim = output_dim
        self.keep_prob = keep_prob

        # layers
        self.PositionalEncoding = PositionalEncoding(self.d_model, dropout = 0, max_len = 400)

        self.GRUs = clones(nn.GRU(1, self.hidden_dim, batch_first = True), self.input_dim)
        self.LastStepAttentions = clones(SingleAttention(self.hidden_dim, 8, attention_type='concat', demographic_dim=12, time_aware=True, use_demographic=False),self.input_dim)
        
        self.FinalAttentionQKV = FinalAttentionQKV(self.hidden_dim, self.hidden_dim, attention_type='mul',dropout = 1 - self.keep_prob)

        self.MultiHeadedAttention = MultiHeadedAttention(self.MHD_num_head, self.d_model,dropout = 1 - self.keep_prob)
        self.SublayerConnection = SublayerConnection(self.d_model, dropout = 1 - self.keep_prob)

        self.PositionwiseFeedForward = PositionwiseFeedForward(self.d_model, self.d_ff, dropout=0.1)

        self.demo_proj_main = nn.Linear(12, self.hidden_dim)
        self.demo_proj = nn.Linear(12, self.hidden_dim)
        self.Linear = nn.Linear(self.hidden_dim, 1)
        self.output = nn.Linear(self.input_dim, self.output_dim)

        self.dropout = nn.Dropout(p = 1 - self.keep_prob)
        self.tanh=nn.Tanh()
        self.softmax = nn.Softmax()
        self.sigmoid = nn.Sigmoid()
        self.relu=nn.ReLU()

    def forward(self, input, lens):
        lens = lens.to('cpu')
        batch_size = input.size(0)
        time_step = input.size(1)
        feature_dim = input.size(2)
        assert(feature_dim == self.input_dim)# input Tensor : 256 * 48 * 76
        assert(self.d_model % self.MHD_num_head == 0)

        GRU_embeded_input = self.GRUs[0](pack_padded_sequence(input[:,:,0].unsqueeze(-1), lens, batch_first=True))[1].squeeze().unsqueeze(1) # b 1 h
        for i in range(feature_dim-1):
            embeded_input = self.GRUs[i+1](pack_padded_sequence(input[:,:,i+1].unsqueeze(-1), lens, batch_first=True))[1].squeeze().unsqueeze(1) # b 1 h
            GRU_embeded_input = torch.cat((GRU_embeded_input, embeded_input), 1)
        posi_input = self.dropout(GRU_embeded_input) # batch_size * d_input * hidden_dim
        contexts = self.Linear(posi_input).squeeze()# b i
        output = self.output(self.dropout(contexts))# b 1
        output = self.sigmoid(output)
          
        return output, None, contexts
    #, self.MultiHeadedAttention.attn




In [14]:
epochs = 150
batch_size = 256
input_dim = 34
hidden_dim = 32
d_model = 32
MHD_num_head = 4
d_ff = 64
output_dim = 1

model = distcare_teacher(input_dim = input_dim, hidden_dim = hidden_dim, d_model=d_model, MHD_num_head=MHD_num_head, d_ff=d_ff, output_dim = output_dim).to(device)
# input_dim, d_model, d_k, d_v, MHD_num_head, d_ff, output_dim
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
teacher_flag = True

RuntimeError: CUDA error: invalid device ordinal
CUDA kernel errors might be asynchronously reported at some other API call,so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.

In [None]:
# Training Teacher
# If you don't want to train Teacher Model:
# - The pretrained taecher model is in direcrtory './model/', and can be directly loaded, 
# - Simply skip this cell and load the model to validate on Dev Dataset.
# logger.info('Training Teacher')

# total_train_loss = []
# total_valid_loss = []
# global_best = 0
# auroc = []
# auprc = []
# minpse = []
# history = []

# pad_token = np.zeros(input_dim)
# # begin_time = time.time()
# best_auroc = 0
# best_auprc = 0
# best_minpse = 0
    
# if target_dataset == 'TJ':    
#     file_name = './model/pretrained-challenge-front-fill-teacher-2covid'
# elif target_dataset == 'HM':
#     file_name = './model/pretrained-challenge-front-fill-teacher-2spain'
# elif target_dataset == 'PD':  
#     file_name = './model/pretrained-challenge-front-fill-teacher-2pd'

# for each_epoch in range(epochs):

#     epoch_loss = []
#     counter_batch = 0
#     model.train()  

#     for step, (batch_x, batch_y, batch_mask_x, batch_lens) in enumerate(batch_iter(train_x, train_y, train_mask_x, train_x_len, batch_size, shuffle=True)):  
#         optimizer.zero_grad()
#         batch_x = torch.tensor(pad_sents(batch_x, pad_token), dtype=torch.float32).to(device)
#         batch_y = torch.tensor(batch_y, dtype=torch.float32).to(device)
#         batch_lens = torch.tensor(batch_lens, dtype=torch.float32).to(device).int()
#         batch_mask_x = torch.tensor(pad_sents(batch_mask_x, pad_token), dtype=torch.float32).to(device)

# #        masks = length_to_mask(batch_lens).unsqueeze(-1).float()

#         opt, decov_loss, emb = model(batch_x, batch_lens)

#         BCE_Loss = get_loss(opt, batch_y.unsqueeze(-1)) # b t 1
# #             REC_Loss = F.mse_loss(masks * recon, masks * batch_x, reduction='mean').to(device)

#         loss = BCE_Loss #+ 1000 * decov_loss

#         epoch_loss.append(BCE_Loss.cpu().detach().numpy())
#         loss.backward()
#         torch.nn.utils.clip_grad_norm_(model.parameters(), 20)
#         optimizer.step()

#         if step % 20 == 0:
#             print('Epoch %d Batch %d: Train Loss = %.4f'%(each_epoch, step, loss.cpu().detach().numpy()))
#             logger.info('Epoch %d Batch %d: Train Loss = %.4f'%(each_epoch, step, loss.cpu().detach().numpy()))

#     epoch_loss = np.mean(epoch_loss)
#     total_train_loss.append(epoch_loss)

#     #Validation
#     y_true = []
#     y_pred = []
#     with torch.no_grad():
#         model.eval()
#         valid_loss = []
#         valid_true = []
#         valid_pred = []
#         for batch_x, batch_y, batch_mask_x, batch_lens in batch_iter(dev_x, dev_y, dev_mask_x, dev_x_len, batch_size):
#             batch_x = torch.tensor(pad_sents(batch_x, pad_token), dtype=torch.float32).to(device)
#             batch_y = torch.tensor(batch_y, dtype=torch.float32).to(device)
#             batch_lens = torch.tensor(batch_lens, dtype=torch.float32).to(device).int()
#             batch_mask_x = torch.tensor(pad_sents(batch_mask_x, pad_token), dtype=torch.float32).to(device)
# #            masks = length_to_mask(batch_lens).unsqueeze(-1).float()


#             opt, decov_loss, emb = model(batch_x, batch_lens)

#             BCE_Loss = get_loss(opt, batch_y.unsqueeze(-1))
# #                 REC_Loss = F.mse_loss(recon, batch_x, reduction='mean').to(device)

#             valid_loss.append(BCE_Loss.cpu().detach().numpy())

#             y_pred += list(opt.cpu().detach().numpy().flatten())
#             y_true += list(batch_y.cpu().numpy().flatten())

#         valid_loss = np.mean(valid_loss)
#         total_valid_loss.append(valid_loss)
#         ret = metrics.print_metrics_binary(y_true, y_pred,verbose = 0)
#         history.append(ret)
#         #print()

#         print('Epoch %d: Loss = %.4f Valid loss = %.4f roc = %.4f'%(each_epoch, total_train_loss[-1], total_valid_loss[-1], ret['auroc']))
#         logger.info('Epoch %d: Loss = %.4f Valid loss = %.4f roc = %.4f'%(each_epoch, total_train_loss[-1], total_valid_loss[-1], ret['auroc']))
#         metrics.print_metrics_binary(y_true, y_pred)

#         cur_auroc = ret['auroc']
#         if cur_auroc > best_auroc:
#             best_auroc = cur_auroc
#             best_auprc = ret['auprc']
#             best_minpse = ret['minpse']
#             state = {
#                 'net': model.state_dict(),
#                 'optimizer': optimizer.state_dict(),
#                 'epoch': each_epoch
#             }
#             torch.save(state, file_name)
#             print('------------ Save best model - AUROC: %.4f ------------'%cur_auroc)       

# print('auroc %.4f'%(best_auroc))
# print('auprc %.4f'%(best_auprc))
# print('minpse %.4f'%(best_minpse))  
# logger.info('auroc %.4f'%(best_auroc))
# logger.info('auprc %.4f'%(best_auprc))
# logger.info('minpse %.4f'%(best_minpse))

In [None]:
if target_dataset == 'PD':    
    file_name = './model/pretrained-challenge-front-fill-teacher-2pd'
elif target_dataset == 'TJ':    
    file_name = './model/pretrained-challenge-front-fill-teacher-2covid'
elif target_dataset == 'HM':
    file_name = './model/pretrained-challenge-front-fill-teacher-2spain'
    
checkpoint = torch.load(file_name, \
                        map_location=torch.device(device=device))
save_epoch = checkpoint['epoch']
# print("last saved model is in epoch {}".format(save_epoch))
# logger.info("last saved model is in epoch {}".format(save_epoch))
model.load_state_dict(checkpoint['net'])
optimizer.load_state_dict(checkpoint['optimizer'])
model.eval()

distcare_teacher(
  (PositionalEncoding): PositionalEncoding(
    (dropout): Dropout(p=0, inplace=False)
  )
  (GRUs): ModuleList(
    (0): GRU(1, 32, batch_first=True)
    (1): GRU(1, 32, batch_first=True)
    (2): GRU(1, 32, batch_first=True)
    (3): GRU(1, 32, batch_first=True)
    (4): GRU(1, 32, batch_first=True)
    (5): GRU(1, 32, batch_first=True)
    (6): GRU(1, 32, batch_first=True)
    (7): GRU(1, 32, batch_first=True)
    (8): GRU(1, 32, batch_first=True)
    (9): GRU(1, 32, batch_first=True)
    (10): GRU(1, 32, batch_first=True)
    (11): GRU(1, 32, batch_first=True)
    (12): GRU(1, 32, batch_first=True)
    (13): GRU(1, 32, batch_first=True)
    (14): GRU(1, 32, batch_first=True)
    (15): GRU(1, 32, batch_first=True)
    (16): GRU(1, 32, batch_first=True)
    (17): GRU(1, 32, batch_first=True)
    (18): GRU(1, 32, batch_first=True)
    (19): GRU(1, 32, batch_first=True)
    (20): GRU(1, 32, batch_first=True)
    (21): GRU(1, 32, batch_first=True)
    (22): GRU(1, 32, 

In [None]:
batch_loss = []
y_true = []
y_pred = []
pad_token = np.zeros(34)
with torch.no_grad():
    model.eval()
    for step, (batch_x, batch_y, batch_lens) in enumerate(batch_iter(test_x, test_y, test_x_len, batch_size, shuffle=True)):  
        optimizer.zero_grad()
        batch_x = torch.tensor(pad_sents(batch_x, pad_token), dtype=torch.float32).to(device)
        batch_y = torch.tensor(batch_y, dtype=torch.float32).to(device)
        batch_lens = torch.tensor(batch_lens, dtype=torch.float32).to(device).int()

        opt, decov_loss, emb = model(batch_x, batch_lens)

        BCE_Loss = get_loss(opt, batch_y.unsqueeze(-1))
#             REC_Loss = F.mse_loss(masks * recon, masks * batch_x, reduction='mean').to(device)

        model_loss =  BCE_Loss 

        loss = model_loss
        batch_loss.append(loss.cpu().detach().numpy())
        if step % 20 == 0:
            print('Batch %d: Test Loss = %.4f'%(step, loss.cpu().detach().numpy()))
            logger.info('Batch %d: Test Loss = %.4f'%(step, loss.cpu().detach().numpy()))
        y_pred += list(opt.cpu().detach().numpy().flatten())
        y_true += list(batch_y.cpu().numpy().flatten())

print("\n==>Predicting on test")
print('Test Loss = %.4f'%(np.mean(np.array(batch_loss))))
logger.info("\n==>Predicting on test")
logger.info('Test Loss = %.4f'%(np.mean(np.array(batch_loss))))
y_pred = np.array(y_pred)
y_pred = np.stack([1 - y_pred, y_pred], axis=1)
test_res = metrics.print_metrics_binary(y_true, y_pred)

Batch 0: Test Loss = 0.0983

==>Predicting on test
Test Loss = 0.1233
confusion matrix:
[[3715   21]
 [ 134  163]]
accuracy = 0.9615670442581177
precision class 0 = 0.9651857614517212
precision class 1 = 0.885869562625885
recall class 0 = 0.9943790435791016
recall class 1 = 0.5488215684890747
AUC of ROC = 0.9414762363102834
AUC of PRC = 0.7530980446603559
min(+P, Se) = 0.6868686868686869
f1_score = 0.6777546638356525


## Student Model

In [None]:
pad_token_source = np.zeros(34)
if target_dataset == 'PD':
    pad_token_target = np.zeros(69)
elif target_dataset == 'TJ':
    pad_token_target = np.zeros(75)
elif target_dataset == 'HM':
    pad_token_target = np.zeros(99)

In [None]:
class ReverseLayerF(Function):

    @staticmethod
    def forward(ctx, x, alpha):
        ctx.alpha = alpha

        return x.view_as(x)

    @staticmethod
    def backward(ctx, grad_output):
        output = grad_output.neg() * ctx.alpha

        return output, None

In [None]:
class distcare_student(nn.Module):
    def __init__(self, input_dim, input_diff_dim, hidden_dim, d_model,  MHD_num_head, d_ff, output_dim, keep_prob=0.5):
        super(distcare_student, self).__init__()

        # hyperparameters
        self.input_dim = input_dim  
        self.input_diff_dim = input_diff_dim
        self.hidden_dim = hidden_dim  # d_model
        self.d_model = d_model
        self.MHD_num_head = MHD_num_head
        self.d_ff = d_ff
        self.output_dim = output_dim
        self.keep_prob = keep_prob

        # layers
        self.PositionalEncoding = PositionalEncoding(self.d_model, dropout = 0, max_len = 400)
        self.GRUs = clones(nn.GRU(1, self.hidden_dim, batch_first = True), self.input_dim)
        self.generalGRUs = clones(nn.GRU(1, self.hidden_dim, batch_first = True), self.input_diff_dim)
        self.LastStepAttentions = clones(SingleAttention(self.hidden_dim, 8, attention_type='concat', demographic_dim=12, time_aware=True, use_demographic=False),self.input_dim)
        
        self.FinalAttentionQKV = FinalAttentionQKV(self.hidden_dim, self.hidden_dim, attention_type='mul',dropout = 1 - self.keep_prob)

        self.MultiHeadedAttention = MultiHeadedAttention(self.MHD_num_head, self.d_model,dropout = 1 - self.keep_prob)
        self.SublayerConnection = SublayerConnection(self.d_model, dropout = 1 - self.keep_prob)

        self.PositionwiseFeedForward = PositionwiseFeedForward(self.d_model, self.d_ff, dropout=0.1)

        self.demo_proj_main = nn.Linear(12, self.hidden_dim)
        self.demo_proj = nn.Linear(12, self.hidden_dim)
        self.Linear = nn.Linear(self.hidden_dim, 1)
        self.output = nn.Linear(self.input_dim + self.input_diff_dim, self.output_dim)

        # adversal方法中的域分类器  
        self.domain_classifier = nn.Sequential()
        self.domain_classifier.add_module('d_fc1', nn.Linear(self.hidden_dim, self.hidden_dim))
        self.domain_classifier.add_module('d_bn1', nn.BatchNorm1d(self.hidden_dim))
        self.domain_classifier.add_module('d_relu1', nn.ReLU(True))
        self.domain_classifier.add_module('d_fc2', nn.Linear(hidden_dim, 2))
        self.domain_classifier.add_module('d_softmax', nn.LogSoftmax(dim=1))

        self.dropout = nn.Dropout(p = 1 - self.keep_prob)
        self.FC_embed = nn.Linear(self.hidden_dim, self.hidden_dim)
        self.tanh=nn.Tanh()
        self.softmax = nn.Softmax()
        self.sigmoid = nn.Sigmoid()
        self.relu=nn.ReLU()
        self.to_MMD = nn.Linear(self.hidden_dim, 1)

    def forward(self, input, input_diff, lens, alpha, is_teacher):
        lens = lens.to('cpu')
        batch_size = input.size(0)
        time_step = input.size(1)
        feature_dim = input.size(2)
        feature_dim_diff = input_diff.size(2)
        assert(feature_dim == self.input_dim)# input Tensor : 256 * 48 * 76
        assert(self.d_model % self.MHD_num_head == 0)
        GRU_embeded_input = self.GRUs[0](pack_padded_sequence(input[:,:,0].unsqueeze(-1), lens, batch_first=True))[1].squeeze().unsqueeze(1) # b 1 h
        for i in range(feature_dim-1):
            embeded_input = self.GRUs[i+1](pack_padded_sequence(input[:,:,i+1].unsqueeze(-1), lens, batch_first=True))[1].squeeze().unsqueeze(1) # b 1 h
            GRU_embeded_input = torch.cat((GRU_embeded_input, embeded_input), 1)

        if is_teacher: # 来自源数据集
            General_GRU_embeded_input = self.generalGRUs[0](pack_padded_sequence(input_diff[:,:,0].unsqueeze(-1), lens, batch_first=True))[1].squeeze().unsqueeze(1) # b 1 h
            for i in range(feature_dim_diff - 1):
                general_embeded_input = self.generalGRUs[i + 1](pack_padded_sequence(input_diff[:,:,i].unsqueeze(-1), lens, batch_first=True))[1].squeeze().unsqueeze(1) # b 1 h
                General_GRU_embeded_input = torch.cat((General_GRU_embeded_input,general_embeded_input), 1)
        
            common_input = GRU_embeded_input[:, 0, :]
            for i in range(1, feature_dim):
                common_input = common_input + GRU_embeded_input[:, i, :]  
            # print(f"common_input1.shape is {common_input.shape}")
            common_input = torch.squeeze(common_input, 1) # batch * hidden
            reverse_input = ReverseLayerF.apply(common_input, alpha)
            # print(f"common_input2.shape is {common_input.shape}")
            domain_output = self.domain_classifier(reverse_input)

            posi_input = self.dropout(torch.cat((GRU_embeded_input, General_GRU_embeded_input), 1)) # batch_size * d_input + d_input_diff * hidden_dim
            
            contexts = self.Linear(posi_input).squeeze()# b i
            output = self.output(self.dropout(contexts))# b 1
            output = self.sigmoid(output)
            return output, domain_output, contexts
        else: # 来自目标数据集，主要是为了混淆domain classifier
            common_input = GRU_embeded_input[:, 0, :]
            for i in range(1, feature_dim):
                common_input = common_input + GRU_embeded_input[:, i, :]  
            common_input = torch.squeeze(common_input, 1) # batch * hidden
            reverse_input = ReverseLayerF.apply(common_input, alpha)
            domain_output = self.domain_classifier(reverse_input)
            return domain_output

        

In [None]:
def getSplitData(x, lens, y):
    train_num =int( len(x) * 0.8) + 1
    dev_num =int( len(x) * 0.1) + 1
    test_num = len(x) - train_num - dev_num
    train_x = []
    train_y = []
    train_len = []
    for idx in range(train_num):
        train_x.append(x[idx])
        train_y.append(int(y[idx][-1]))
        train_len.append(lens[idx])

    dev_x = []
    dev_y = []
    dev_len = []
    for idx in range(train_num, train_num + dev_num):
        dev_x.append(x[idx])
        dev_y.append(int(y[idx][-1]))
        dev_len.append(lens[idx])

    test_x = []
    test_y = []
    test_len = []

    for idx in range(train_num + dev_num, train_num + dev_num + test_num):
        test_x.append(x[idx])
        test_y.append(int(y[idx][-1]))
        test_len.append(lens[idx])
    return train_x, train_y, train_len, dev_x, dev_y, dev_len, test_x, test_y, test_len

In [None]:
logger.info("load target data")
if target_dataset == 'PD':
    data_path = './data/PD/'
    all_x_target = pickle.load(open(data_path + 'x.pkl', 'rb'))
    all_time_target = pickle.load(open(data_path + 'y_z.pkl', 'rb'))
    all_x_len_target = [len(i) for i in all_x_target]

    subset_idx_target = [0, 2, 3, 4, 5, 7, 8, 9, 12, 16, 17, 19, 20, 56, 57, 58]
    other_idx_target = list(range(69))
    for i in subset_idx_target:
        other_idx_target.remove(i)
    for i in range(len(all_x_target)):
        cur = np.array(all_x_target[i], dtype=float)
        cur_subset = cur[:, subset_idx_target]
        cur_other = cur[:, other_idx_target]
        all_x_target[i] = np.concatenate((cur_subset, cur_other), axis=1).tolist()
elif target_dataset == 'TJ':
    data_path = './data/Tongji/'
    all_x_target = pickle.load(open(data_path + 'x.pkl', 'rb'))
    all_y_target = pickle.load(open(data_path + 'y.pkl', 'rb'))
    all_time_target = pickle.load(open(data_path + 'y.pkl', 'rb'))
    all_x_len_target = [len(i) for i in all_x_target]

    for i in range(len(all_time_target)):
        for j in range(len(all_time_target[i])):
            all_time_target[i][j] = all_time_target[i][j][-1]
            all_y_target[i][j] = all_y_target[i][j][0]

    subset_idx_target = [2, 3, 4, 9, 13, 14, 26, 27, 30, 32, 34, 38, 39, 41, 52, 53, 66, 74]
    other_idx_target = list(range(75))
    for i in subset_idx_target:
        other_idx_target.remove(i)
    for i in range(len(all_x_target)):
        cur = np.array(all_x_target[i], dtype=float)
        cur_subset = cur[:, subset_idx_target]
        cur_other = cur[:, other_idx_target]
        all_x_target[i] = np.concatenate((cur_subset, cur_other), axis=1).tolist()
elif target_dataset == 'HM':
    data_path = './data/CDSL/'
    all_x_target = pickle.load(open(data_path + 'x.pkl', 'rb'))
    all_y_target = pickle.load(open(data_path + 'y.pkl', 'rb'))
    all_time_target = pickle.load(open(data_path + 'y.pkl', 'rb'))
    all_x_len_target = [len(i) for i in all_x_target]

    for i in range(len(all_time_target)):
        for j in range(len(all_time_target[i])):
            all_time_target[i][j] = all_time_target[i][j][-1]
            all_y_target[i][j] = all_y_target[i][j][0]

    subset_idx_target = [5, 6, 4, 2, 3, 48, 79, 76, 87, 25, 30, 31, 18, 43, 58, 66, 40, 57, 23, 92, 50, 54, 91, 60, 39, 81]
    other_idx_target= list(range(99))
    for i in subset_idx_target:
        other_idx_target.remove(i)
    for i in range(len(all_x_target)):
        cur = np.array(all_x_target[i], dtype=float)
        cur_subset = cur[:, subset_idx_target]
        cur_other = cur[:, other_idx_target]
    #     tar_all_x[i] = np.concatenate((cur_subset, cur_other), axis=1).tolist()
        all_x_target[i] = np.concatenate((cur_subset, cur_other), axis=1).tolist()
    
if target_dataset == 'PD':
    all_x_target = all_x_target
    all_y_target = all_time_target
elif  target_dataset == 'HM' or target_dataset == 'TJ':
    examples = []
    for idx in range(len(all_x_target)):
        examples.append((all_x_target[idx], all_y_target[idx], all_time_target[idx], all_x_len_target[idx]))
    examples = sorted(examples, key=lambda e: len(e[0]), reverse=True)
    all_x_target = [e[0] for e in examples]
    all_y_target = [e[1] for e in examples]
    all_time_target = [e[2] for e in examples]
    all_x_len_target = [e[3] for e in examples]

num_source = len(all_x_source)
num_target = len(all_x_target)
# print(target_dataset,len(all_x_target), len(all_x_target[0]),len(all_x_target[0][0]))
all_x_target_confuse = []
all_x_len_target_confuse = []
all_y_target_confuse = []
all_x_source_confuse = []
all_x_len_source_confuse = []
all_y_source_confuse = []
repeat_times = 0

if num_source < num_target:
    all_x_target_confuse = all_x_target
    all_y_target_confuse = all_y_target
    all_x_len_target_confuse = all_x_len_target
    while repeat_times * num_source < num_target:
        all_x_source_confuse = all_x_source_confuse + all_x_source
        all_x_len_source_confuse = all_x_len_source_confuse + all_x_len_source
        all_y_source_confuse =  all_y_source_confuse + all_y_source
        repeat_times = repeat_times + 1
    all_x_source_confuse = all_x_source_confuse[:num_target]
    all_x_len_source_confuse = all_x_len_source_confuse[:num_target]
    all_y_source_confuse = all_y_source_confuse[:num_target]
elif num_target < num_source:
    all_x_source_confuse = all_x_source
    all_x_len_source_confuse = all_x_len_source
    all_y_source_confuse = all_y_source
    while repeat_times * num_target < num_source:
        all_x_target_confuse = all_x_target_confuse + all_x_target
        all_x_len_target_confuse = all_x_len_target_confuse + all_x_len_target
        all_y_target_confuse = all_y_target_confuse + all_y_target
        repeat_times = repeat_times + 1
    all_x_target_confuse = all_x_target_confuse[:num_source]
    all_x_len_target_confuse = all_x_len_target_confuse[:num_source]
    all_y_target_confuse = all_y_target_confuse[:num_source]

# print(f"len(all_x_source_confuse) is {len(all_x_source_confuse)}, len(all_x_target_confuse) is {len(all_x_target_confuse)}")

#todo 划分train、dev、test 
# all_x_source_confuse = pad_sents(all_x_source_confuse, pad_token_source)
# all_x_target_confuse = pad_sents(all_x_target_confuse, pad_token_target)
train_x_source_confuse, train_y_source_confuse, train_len_source_confuse, dev_x_source_confuse, dev_y_source_confuse, dev_len_source_confuse, test_x_source_confuse,\
test_y_source_confuse, test_len_source_confuse = getSplitData(all_x_source_confuse, all_x_len_source_confuse, all_y_source_confuse)

train_x_target_confuse, train_y_target_confuse, train_len_target_confuse, dev_x_target_confuse, dev_y_target_confuse, dev_len_target_confuse, test_x_target_confuse,\
test_y_target_confuse, test_len_target_confuse = getSplitData(all_x_target_confuse, all_x_len_target_confuse, all_y_target_confuse)

# long_x_source = all_x_source
# long_y_source = [y[-1] for y in all_y_source]



In [None]:
epochs = 50
batch_size = 256
common_dim = subset_cnt 

diff_dim = input_dim - subset_cnt
hidden_dim = 64
d_model = 64
MHD_num_head = 4
d_ff = 64
output_dim = 1
model_student = distcare_student(input_dim = common_dim, input_diff_dim = diff_dim, hidden_dim = hidden_dim, d_model=d_model, MHD_num_head=MHD_num_head, d_ff=d_ff, output_dim = output_dim).to(device)
optimizer_student = torch.optim.Adam(model_student.parameters(), lr=1e-3)

## Training student model

In [None]:
class MultitaskLoss(nn.Module):
    def __init__(self, task_num=2):
        super(MultitaskLoss, self).__init__()
        self.task_num = task_num
        self.alpha = nn.Parameter(torch.ones((task_num)), requires_grad=True)
        self.bce = nn.BCELoss()
        self.kl = nn.KLDivLoss(reduce=True, size_average=True)

    def forward(self, opt_student, batch_y, emb_student, emb_teacher, tar_source, tar_tar):
        BCE_Loss = self.bce(opt_student, batch_y)
        emb_Loss = self.kl(emb_student, emb_teacher)
        return BCE_Loss * self.alpha[0] + emb_Loss * self.alpha[1]

def get_multitask_loss(opt_student, batch_y, emb_student, emb_teacher):
    mtl = MultitaskLoss(task_num=3)
    return mtl(opt_student, batch_y, emb_student, emb_teacher)

In [None]:
#Generate Teacher model embedding
model.load_state_dict(checkpoint['net'])
optimizer.load_state_dict(checkpoint['optimizer'])
model.eval()

train_teacher_emb = []
batch_loss = []
y_true = []
y_pred = []
pad_token = np.zeros(34)
with torch.no_grad():
    model.eval()
    for step, (batch_x, batch_y, batch_lens) in enumerate(batch_iter(train_x_source_confuse, train_y_source_confuse, train_len_source_confuse, batch_size, shuffle=False)):  
        optimizer.zero_grad()
        batch_x = torch.tensor(pad_sents(batch_x, pad_token_source), dtype=torch.float32).to(device)
        batch_y = torch.tensor(batch_y, dtype=torch.float32).to(device)
        batch_lens = torch.tensor(batch_lens, dtype=torch.float32).to(device).int()

        masks = length_to_mask(batch_lens).unsqueeze(-1).float()

        opt, decov_loss, emb = model(batch_x, batch_lens)
        train_teacher_emb.append(emb.cpu().detach().numpy())

        BCE_Loss = get_loss(opt, batch_y.unsqueeze(-1))
#             REC_Loss = F.mse_loss(masks * recon, masks * batch_x, reduction='mean').to(device)

        model_loss =  BCE_Loss 
        if step % 20 == 0:
            print('Batch %d: Test Loss = %.4f'%(step, model_loss.cpu().detach().numpy()))
            logger.info('Batch %d: Test Loss = %.4f'%(step, model_loss.cpu().detach().numpy()))

Batch 0: Test Loss = 0.1227
Batch 20: Test Loss = 0.1455
Batch 40: Test Loss = 0.1408
Batch 60: Test Loss = 0.1223
Batch 80: Test Loss = 0.0873
Batch 100: Test Loss = 0.0720
Batch 120: Test Loss = 0.1043


In [None]:
# class MyDataset(Dataset):
#     def __init__(self, data, len, labels):
#         self.data = data
#         self.len = len
#         self.labels = labels

#     def __len__(self):
#         return len(self.data)

#     def __getitem__(self, idx):
#         sample = self.data[idx]
#         len = self.len[idx]
#         label = self.labels[idx]
#         return sample, len, label

# batch_size = 256
# train_source_dataset = MyDataset(train_x_source_confuse, train_len_source_confuse, train_y_source_confuse)
# train_target_dataset = MyDataset(train_x_target_confuse, train_len_target_confuse, train_y_target_confuse)
# train_source_dataloader = DataLoader(train_source_dataset, batch_size= batch_size)
# train_target_dataloader = DataLoader(train_target_dataset, batch_size=batch_size)
# train_source_data_iter = iter(train_source_dataloader)
# train_target_data_iter = iter(train_target_dataloader)

In [None]:
# Training Student
# If you don't want to train Student Model:
# - The pretrained student model is in direcrtory './model/', and can be directly loaded, 
# - Simply skip this cell and load the model to validate on Dev Dataset.

logger.info('Training Student')

epochs = 30
total_train_loss = []
total_valid_loss = []
global_best = 0
auroc = []
auprc = []
minpse = []
history = []
# begin_time = time.time()
best_auroc = 0
best_auprc = 0
best_minpse = 0
best_total_loss = 0x3f3f3f3f
loss_domain = torch.nn.NLLLoss()
loss_predict = torch.nn.MSELoss()
loss_embed = nn.KLDivLoss(reduce=True, size_average=True)




print(f'len(train_source_iter) is {len(train_x_source_confuse)}, len(train_target_iter) is {len(train_x_target_confuse)}, steps is {len(train_x_source_confuse) // batch_size + 1}')

if target_dataset == 'PD':
    data_str = 'pd'
elif target_dataset == 'TJ':
    data_str = 'covid'
elif target_dataset == 'HM':
    data_str = 'spain'


if teacher_flag:
    file_name = './model/pretrained-challenge-front-fill-2'+ data_str
else: 
    file_name = './model/pretrained-challenge-front-fill-2'+ data_str + '-noteacher'

for each_epoch in range(epochs):
    train_source_iter = batch_iter(train_x_source_confuse, train_y_source_confuse, train_len_source_confuse, batch_size=batch_size)
    dev_source_iter = batch_iter(dev_x_source_confuse, dev_y_source_confuse, dev_len_source_confuse, batch_size=batch_size)
    test_source_iter = batch_iter(test_x_source_confuse, test_y_source_confuse, test_len_source_confuse, batch_size=batch_size)
    train_target_iter = batch_iter(train_x_target_confuse, train_y_target_confuse, train_len_target_confuse, batch_size=batch_size)
    dev_target_iter = batch_iter(dev_x_target_confuse, dev_y_target_confuse, dev_len_target_confuse, batch_size=batch_size)
    test_target_iter = batch_iter(test_x_target_confuse, test_y_target_confuse, test_len_target_confuse, batch_size=batch_size)
    epoch_loss = []
    counter_batch = 0
    model_student.train()  
    model.eval()
    steps = len(train_x_source_confuse) // batch_size + 1 if len(train_x_source_confuse) % batch_size != 0 else len(train_x_source_confuse) // batch_size
    for step in range(steps):
        # -----source_domain--------
        batch_x, batch_y, batch_lens= next(train_source_iter)
        p = float(step + each_epoch * steps) / epochs / steps
        alpha = 2. / (1. + np.exp(-10 * p)) - 1
        optimizer_student.zero_grad()
        batch_x = torch.tensor(pad_sents(batch_x, pad_token_source), dtype=torch.float32).to(device)
        batch_y = torch.tensor(batch_y, dtype=torch.float32).to(device)
        batch_lens = torch.tensor(batch_lens, dtype=torch.float32).to(device).int()
        # batch_mask_x = torch.tensor(pad_sents(batch_mask_x, pad_token), dtype=torch.float32).to(device)
        # opt_student, decov_loss_student, emb_student, tar_result = model_student(batch_x[:,:,:subset_cnt], batch_x[:,:,subset_cnt:], batch_lens, [tar_all_x, tar_all_x_len], True)
        domain_label = torch.zeros(min(batch_size, batch_x.shape[0])).long().to(device)
        opt_student, opt_domain, emb_student = model_student(batch_x[:,:,:subset_cnt], batch_x[:,:,subset_cnt:], batch_lens, alpha, True)
        emb_teacher = torch.tensor(train_teacher_emb[step], dtype=torch.float32).to(device)
        emb_student = F.log_softmax(emb_student, dim=1)
        emb_teacher = F.softmax(emb_teacher.detach(), dim=1)
        err_emb = loss_embed(emb_student, emb_teacher)
        err_predict = loss_predict(opt_student, batch_y)
        err_domain1 = loss_domain(opt_domain, domain_label)
            # loss = get_multitask_loss(opt_student, batch_y.unsqueeze(-1), emb_student, emb_teacher)

        # -----target_domain--------
        batch_x, batch_y, batch_lens = next(train_target_iter)
        p = float(step + each_epoch * len(train_x_source)) / epochs / len(train_x_len_source)
        alpha = 2. / (1. + np.exp(-10 * p)) - 1
        optimizer_student.zero_grad()
        batch_x = torch.tensor(pad_sents(batch_x, pad_token_target), dtype=torch.float32).to(device)
        batch_y = torch.tensor(batch_y, dtype=torch.float32).to(device)
        batch_lens = torch.tensor(batch_lens, dtype=torch.float32).to(device).int()
        # batch_mask_x = torch.tensor(pad_sents(batch_mask_x, pad_token), dtype=torch.float32).to(device)
        # opt_student, decov_loss_student, emb_student, tar_result = model_student(batch_x[:,:,:subset_cnt], batch_x[:,:,subset_cnt:], batch_lens, [tar_all_x, tar_all_x_len], True)
        domain_label = torch.ones(min(batch_size, batch_x.shape[0])).long().to(device)
        opt_domain = model_student(batch_x[:,:,:subset_cnt], batch_x[:,:,subset_cnt:], batch_lens, alpha, False)
        err_domain2 = loss_domain(opt_domain, domain_label)

        # -----common--------
        loss = err_emb + err_predict + err_domain1 + err_domain2
        epoch_loss.append(loss.cpu().detach().numpy())
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model_student.parameters(), 20)
        optimizer_student.step()

        if step % 20 == 0:
            print('Epoch %d Batch %d: Train Loss = %.4f'%(each_epoch, step, loss.cpu().detach().numpy()))
            logger.info('Epoch %d Batch %d: Train Loss = %.4f'%(each_epoch, step, loss.cpu().detach().numpy()))

    epoch_loss = np.mean(epoch_loss)
    total_train_loss.append(epoch_loss)


    # dev_source_dataset = MyDataset(dev_x_source_confuse, dev_len_source_confuse, dev_y_source_confuse)
    # dev_target_dataset = MyDataset(dev_x_target_confuse, dev_len_target_confuse, dev_y_target_confuse)
    # dev_source_dataloader = DataLoader(dev_source_dataset, batch_size= batch_size)
    # dev_target_dataloader = DataLoader(dev_target_dataset, batch_size=batch_size)
    #Validation

    y_true = []
    y_pred = []
    with torch.no_grad():
        steps = len(dev_x_source_confuse) // batch_size + 1 if len(dev_x_source_confuse) % batch_size != 0 else len(dev_x_source_confuse) // batch_size
        for step in range(steps):
            # -----source_domain--------
            batch_x, batch_y, batch_lens= next(dev_source_iter)
            p = float(step + each_epoch * steps) / epochs / steps
            alpha = 2. / (1. + np.exp(-10 * p)) - 1
            optimizer_student.zero_grad()
            batch_x = torch.tensor(pad_sents(batch_x, pad_token_source), dtype=torch.float32).to(device)
            batch_y = torch.tensor(batch_y, dtype=torch.float32).to(device)
            batch_lens = torch.tensor(batch_lens, dtype=torch.float32).to(device).int()
            # batch_mask_x = torch.tensor(pad_sents(batch_mask_x, pad_token), dtype=torch.float32).to(device)
            # opt_student, decov_loss_student, emb_student, tar_result = model_student(batch_x[:,:,:subset_cnt], batch_x[:,:,subset_cnt:], batch_lens, [tar_all_x, tar_all_x_len], True)
            domain_label = torch.zeros(min(batch_size, batch_x.shape[0])).long().to(device)
            opt_student, opt_domain, emb_student = model_student(batch_x[:,:,:subset_cnt], batch_x[:,:,subset_cnt:], batch_lens, alpha, True)
            # emb_teacher = torch.tensor(dev_teacher_emb[step], dtype=torch.float32).to(device)
            emb_student = F.log_softmax(emb_student, dim=1)
            emb_teacher = F.softmax(emb_teacher.detach(), dim=1)
            # err_emb = loss_embed(emb_student, emb_teacher) #todo 是否考虑它
            err_predict = loss_predict(opt_student, batch_y)
            err_domain1 = loss_domain(opt_domain, domain_label)
                # loss = get_multitask_loss(opt_student, batch_y.unsqueeze(-1), emb_student, emb_teacher)

            # -----target_domain--------
            batch_x, batch_y, batch_lens = next(dev_target_iter)
            optimizer_student.zero_grad()
            batch_x = torch.tensor(pad_sents(batch_x, pad_token_target), dtype=torch.float32).to(device)
            batch_y = torch.tensor(batch_y, dtype=torch.float32).to(device)
            batch_lens = torch.tensor(batch_lens, dtype=torch.float32).to(device).int()
            # batch_mask_x = torch.tensor(pad_sents(batch_mask_x, pad_token), dtype=torch.float32).to(device)
            # opt_student, decov_loss_student, emb_student, tar_result = model_student(batch_x[:,:,:subset_cnt], batch_x[:,:,subset_cnt:], batch_lens, [tar_all_x, tar_all_x_len], True)
            domain_label = torch.ones(min(batch_size, batch_x.shape[0])).long().to(device)
            opt_domain = model_student(batch_x[:,:,:subset_cnt], batch_x[:,:,subset_cnt:], batch_lens, alpha, False)
            err_domain2 = loss_domain(opt_domain, domain_label)

            # -----common--------
            loss = err_predict + err_domain1 + err_domain2
            if loss < best_total_loss:
                best_total_loss = loss
                state = {
                    'net': model_student.state_dict(),
                    'optimizer': optimizer_student.state_dict(),
                    'epoch': each_epoch
                }
                torch.save(state, file_name)
                print('------------ Save best model - TOTAL_LOSS: %.4f ------------'%best_total_loss)
                logger.info('------------ Save best model - TOTAL_LOSS: %.4f ------------'%best_total_loss)




In [None]:
if target_dataset == 'PD':    
    data_str = 'pd'
elif target_dataset == 'TJ':    
    data_str = 'covid'
elif target_dataset == 'HM':
    data_str = 'spain'

if teacher_flag:
    file_name = './model/pretrained-challenge-front-fill-2'+ data_str
else: 
    file_name = './model/pretrained-challenge-front-fill-2'+ data_str + '-noteacher'

checkpoint = torch.load(file_name, \
                        map_location=torch.device("cuda:0" if torch.cuda.is_available() == True else 'cpu') )
save_epoch = checkpoint['epoch']
print("last saved model is in epoch {}".format(save_epoch))
logger.info("last saved model is in epoch {}".format(save_epoch))
model_student.load_state_dict(checkpoint['net'])
optimizer_student.load_state_dict(checkpoint['optimizer'])
model_student.eval()

last saved model is in epoch 20


distcare_student(
  (PositionalEncoding): PositionalEncoding(
    (dropout): Dropout(p=0, inplace=False)
  )
  (GRUs): ModuleList(
    (0): GRU(1, 64, batch_first=True)
    (1): GRU(1, 64, batch_first=True)
    (2): GRU(1, 64, batch_first=True)
    (3): GRU(1, 64, batch_first=True)
    (4): GRU(1, 64, batch_first=True)
    (5): GRU(1, 64, batch_first=True)
    (6): GRU(1, 64, batch_first=True)
    (7): GRU(1, 64, batch_first=True)
    (8): GRU(1, 64, batch_first=True)
    (9): GRU(1, 64, batch_first=True)
    (10): GRU(1, 64, batch_first=True)
    (11): GRU(1, 64, batch_first=True)
    (12): GRU(1, 64, batch_first=True)
    (13): GRU(1, 64, batch_first=True)
    (14): GRU(1, 64, batch_first=True)
    (15): GRU(1, 64, batch_first=True)
    (16): GRU(1, 64, batch_first=True)
    (17): GRU(1, 64, batch_first=True)
  )
  (generalGRUs): ModuleList(
    (0): GRU(1, 64, batch_first=True)
    (1): GRU(1, 64, batch_first=True)
    (2): GRU(1, 64, batch_first=True)
    (3): GRU(1, 64, batch_firs

# evaluate student model

In [None]:
#anchor
batch_loss = []
y_true = []
y_pred = []
with torch.no_grad():
    model_student.eval()
    # print(f"all_x_len_source_confuse.length is {len(all_x_len_source_confuse)}, all_x_source_confuse is {len(all_x_source_confuse)}")
    test_source_iter = batch_iter(test_x_source_confuse, test_y_source_confuse, test_len_source_confuse, batch_size=batch_size, shuffle=True)
    steps = len(test_x_source_confuse) // batch_size + 1 if len(test_x_source_confuse) % batch_size != 0 else len(test_x_source_confuse) // batch_size
    for step in range(steps):
        # -----source_domain--------
        batch_x, batch_y, batch_lens= next(test_source_iter) 
        optimizer_student.zero_grad()
        batch_x = torch.tensor(pad_sents(batch_x, pad_token), dtype=torch.float32).to(device)
        batch_y = torch.tensor(batch_y, dtype=torch.float32).to(device)
        batch_lens = torch.tensor(batch_lens, dtype=torch.float32).to(device).int()
        masks = length_to_mask(batch_lens).unsqueeze(-1).float()
        opt, _, _  = model_student(batch_x[:,:,:subset_cnt], batch_x[:,:,subset_cnt:], batch_lens, 1, True)

        BCE_Loss = get_loss(opt, batch_y.unsqueeze(-1))
#             REC_Loss = F.mse_loss(masks * recon, masks * batch_x, reduction='mean').to(device)

        model_loss =  BCE_Loss 

        loss = model_loss
        batch_loss.append(loss.cpu().detach().numpy())
        if step % 20 == 0:
            print('Batch %d: Test Loss = %.4f'%(step, loss.cpu().detach().numpy()))
            logger.info('Batch %d: Test Loss = %.4f'%(step, loss.cpu().detach().numpy()))
        y_pred += list(opt.cpu().detach().numpy().flatten())
        y_true += list(batch_y.cpu().numpy().flatten())

print("\n==>Predicting on test")
print('Test Loss = %.4f'%(np.mean(np.array(batch_loss))))
logger.info("\n==>Predicting on test")
logger.info('Test Loss = %.4f'%(np.mean(np.array(batch_loss))))
y_pred = np.array(y_pred)
y_pred = np.stack([1 - y_pred, y_pred], axis=1)
test_res = metrics.print_metrics_binary(y_true, y_pred)

Batch 0: Test Loss = 0.3369

==>Predicting on test
Test Loss = 0.2589
confusion matrix:
[[3736    0]
 [ 297    0]]
accuracy = 0.9263575673103333
precision class 0 = 0.9263575673103333
precision class 1 = nan
recall class 0 = 1.0
recall class 1 = 0.0
AUC of ROC = 0.8111328308062784
AUC of PRC = 0.38592537778006686
min(+P, Se) = 0.436241610738255
f1_score = nan


  prec1 = cf[1][1] / (cf[1][1] + cf[0][1])


## Transfer Target Dataset & Model

In [None]:
if target_dataset == 'PD':
    source_common_idx = [31, 29, 28, 33, 25, 18, 7, 21, 16, 15, 19, 17, 24, 3, 5, 0]
    target_common_idx = [0, 2, 3, 4, 5, 7, 8, 9, 12, 16, 17, 19, 20, 56, 57, 58]
    source_data_path = './data/Challenge/'
    source_x = pickle.load(open(source_data_path + 'new_x_front_fill.dat', 'rb'))
    target_data_path = './data/PD/'
    target_x = pickle.load(open(target_data_path + 'x.pkl', 'rb'))
elif target_dataset == 'TJ':
    source_common_idx = [27, 29, 18, 16, 26, 33, 28, 31, 32, 15, 11, 25, 21, 20, 9, 17, 30, 19]
    target_common_idx = [2, 3, 4, 9, 13, 14, 26, 27, 30, 32, 34, 38, 39, 41, 52, 53, 66, 74]
    source_data_path = './data/Challenge/'
    source_x = pickle.load(open(source_data_path + 'new_x_front_fill.dat', 'rb'))
    target_data_path = './data/Tongji/'
    target_x = pickle.load(open(target_data_path + 'x.pkl', 'rb'))

elif target_dataset == 'HM':
    source_common_idx = [0, 1, 2, 3, 5, 9, 11, 12, 13, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33]
    target_common_idx = [5, 6, 4, 2, 3, 48, 79, 76, 87, 25, 30, 31, 18, 43, 58, 66, 40, 57, 23, 92, 50, 54, 91, 60, 39, 81]
    source_data_path = './data/Challenge/'
    source_x = pickle.load(open(source_data_path + 'new_x_front_fill.dat', 'rb'))
    target_data_path = './data/CDSL/'
    target_x = pickle.load(open(target_data_path + 'x.pkl', 'rb'))

assert(len(source_common_idx) == len(target_common_idx))
common_len = len(source_common_idx)
source_x_diff = []
target_x_diff = []

source_total_len = 34
source_other_idx = list(range(source_total_len))
for i in source_common_idx:
    source_other_idx.remove(i)

if target_dataset == 'PD':
    target_total_len = 69
    target_other_idx = list(range(target_total_len))
    for i in target_common_idx:
        target_other_idx.remove(i)
elif target_dataset == 'TJ':
    target_other_idx = list(range(75))
    target_total_len = 75
    for i in target_common_idx:
        target_other_idx.remove(i)
elif target_dataset == 'HM':
    target_other_idx = list(range(99))
    target_total_len = 99
    for i in target_common_idx:
        target_other_idx.remove(i)

for i in range(len(source_x)):
    cur = np.array(source_x[i], dtype=float)
    cur_subset = cur[:, source_common_idx]
    cur_other = cur[:, source_other_idx]
    source_x_diff.append(cur_other.tolist())
    source_x[i] = np.concatenate((cur_subset, cur_other), axis=1).tolist()

for i in range(len(target_x)):
    cur = np.array(target_x[i], dtype=float)
    cur_subset = cur[:, target_common_idx]
    cur_other = cur[:, target_other_idx]
    target_x_diff.append(cur_other.tolist())
    target_x[i] = np.concatenate((cur_subset, cur_other), axis=1).tolist()


source_max = 0
for i in range(len(source_x_diff)):
    if source_max < len(source_x_diff[i]):
        source_max = len(source_x_diff[i])

source_x_diff_longest = max(list(len(_) for _ in source_x_diff))
source_x_longest = max(list(len(_) for _ in source_x))
source_batch = len(source_x_diff)
source_diff_features = source_total_len - common_len
source_x_diff_ex = torch.zeros((source_batch, source_x_diff_longest, source_diff_features))
source_x_ex = torch.zeros((source_batch, source_x_longest, source_total_len))

for i in range(len(source_x_diff)):
    for j in range(source_x_diff_longest):
        cur_len = len(source_x_diff[i])
        if j < cur_len:
            source_x_diff_ex[i,j,:] = torch.Tensor(source_x_diff[i])[j,:]
        else:
            source_x_diff_ex[i,j,:] = torch.Tensor(source_x_diff[i])[cur_len - 1,:]

for i in range(len(source_x)):
    for j in range(source_x_longest):
        cur_len = len(source_x[i])
        if j < cur_len:
            source_x_ex[i,j,:] = torch.Tensor(source_x[i])[j,:]
        else:
            source_x_ex[i,j,:] = torch.Tensor(source_x[i])[cur_len - 1,:]

target_x_diff_longest = max(list(len(_) for _ in target_x_diff))
target_batch = len(target_x_diff)
target_features = target_total_len - common_len
target_x_diff_ex = torch.zeros((target_batch, target_x_diff_longest, target_features))

for i in range(len(target_x_diff)):
    for j in range(target_x_diff_longest):
        cur_len = len(target_x_diff[i])
        if j < cur_len:
            target_x_diff_ex[i,j,:] = torch.Tensor(target_x_diff[i])[j,:]
        else:
            target_x_diff_ex[i,j,:] = torch.Tensor(target_x_diff[i])[cur_len - 1,:]





In [None]:
print(f'source_x_diff_ex.shape is {np.array(source_x_diff_ex).shape}, max_len is {source_max}')
print(f'source_x_ex.shape is {np.array(source_x_ex.shape)}')
print(f'target_x_diff_ex.shape is {np.array(target_x_diff_ex).shape}')

source_x_diff_ex.shape is (40336, 336, 16), max_len is 336
source_x_ex.shape is [40336   336    34]
target_x_diff_ex.shape is (361, 13, 57)


In [None]:
source_x_diff_mean = np.mean(np.array(source_x_diff_ex), 0)
source_x_mean = np.mean(np.array(source_x_ex), 0 )
target_x_diff_mean = np.mean(np.array(target_x_diff_ex), 0)
source_x_diff_mean.shape, target_x_diff_mean.shape

((336, 16), (13, 57))

In [None]:
from dtw import *
latest_idx = []
for i in range(target_x_diff_mean.shape[1]):
    min_idx = 0
    min_distance = float('inf')
    for j in range(source_x_mean.shape[1]):
        source_feature = source_x_mean[:, j]
        target_feature = target_x_diff_mean[:, i]
        alignment = dtw(source_feature, target_feature)
        distance = alignment.distance
        if min_distance > distance:
            min_distance = distance
            min_idx = j
    latest_idx.append(min_idx)
latest_idx

Importing the dtw module. When using in academic works please cite:
  T. Giorgino. Computing and Visualizing Dynamic Time Warping Alignments in R: The dtw Package.
  J. Stat. Soft., doi:10.18637/jss.v031.i07.



[29,
 1,
 15,
 29,
 21,
 22,
 26,
 15,
 18,
 26,
 1,
 18,
 15,
 15,
 8,
 29,
 30,
 26,
 29,
 9,
 27,
 27,
 20,
 29,
 29,
 28,
 9,
 6,
 26,
 5,
 26,
 29,
 26,
 1,
 29,
 10,
 29,
 15,
 6,
 15,
 22,
 29,
 6,
 29,
 26,
 12,
 26,
 1,
 10,
 32,
 7,
 22,
 10,
 1,
 25,
 5,
 2]

In [None]:
logger.info("Transfer Target Dataset & Model")
# if target_dataset == 'TJ':
#     data_path = './data/Tongji/'
#     all_x = pickle.load(open(data_path + 'x.dat', 'rb'))
#     all_y = pickle.load(open(data_path + 'y.dat', 'rb'))
#     all_time = pickle.load(open(data_path + 'time_all.dat', 'rb'))
#     all_x_len = [len(i) for i in all_x]

#     tar_subset_idx = [0,1,2,7,11,12,24,25,28,30,32,36,37,39,50,51,65,73]
#     tar_other_idx = list(range(74))
#     for i in tar_subset_idx:
#         tar_other_idx.remove(i)
#     for i in range(len(all_x)):
#         cur = np.array(all_x[i], dtype=float)
#         cur_subset = cur[:, tar_subset_idx]
#         cur_other = cur[:, tar_other_idx]
#         all_x[i] = np.concatenate((cur_subset, cur_other), axis=1).tolist()
#     print(all_x[0])
#     print(len(all_x[0][0]))
#     logger.info(all_x[0])
#     logger.info(len(all_x[0][0]))
    
# elif target_dataset == 'HM':
#     data_path = './data/Spain/'
#     all_x = pickle.load(open(data_path + 'x.dat', 'rb'))
#     all_y = pickle.load(open(data_path + 'y.dat', 'rb'))
#     all_time = pickle.load(open(data_path + 'time_all.dat', 'rb'))
#     all_x_len = [len(i) for i in all_x]
    
#     tar_subset_idx = [39, 35, 23, 47, 55, 51, 22, 53, 25, 15, 43, 65, 1, 2, 48, 12, 26, 44, 49]
#     tar_other_idx = list(range(66))
#     for i in tar_subset_idx:
#         tar_other_idx.remove(i)
#     for i in range(len(all_x)):
#         cur = np.array(all_x[i], dtype=float)
#         cur_subset = cur[:, tar_subset_idx]
#         cur_other = cur[:, tar_other_idx]
#         all_x[i] = np.concatenate((cur_subset, cur_other), axis=1).tolist()
#     print(all_x[0])
#     print(len(all_x[0][0]))
#     print(all_y[0])
#     logger.info(all_x[0])
#     logger.info(len(all_x[0][0]))
#     logger.info(all_y[0])
if target_dataset == 'TJ':
    data_path = './data/Tongji/'
    all_x = pickle.load(open(data_path + 'x.pkl', 'rb'))
    all_y = pickle.load(open(data_path + 'y.pkl', 'rb'))
    all_time = pickle.load(open(data_path + 'y.pkl', 'rb'))
    all_x_len = [len(i) for i in all_x]

    for i in range(len(all_time)):
        for j in range(len(all_time[i])):
            all_time[i][j] = all_time[i][j][-1]
            all_y[i][j] = all_y[i][j][0]

    tar_subset_idx = [2, 3, 4, 9, 13, 14, 26, 27, 30, 32, 34, 38, 39, 41, 52, 53, 66, 74]
    tar_other_idx = list(range(75))
    for i in tar_subset_idx:
        tar_other_idx.remove(i)
    for i in range(len(all_x)):
        cur = np.array(all_x[i], dtype=float)
        cur_subset = cur[:, tar_subset_idx]
        cur_other = cur[:, tar_other_idx]
        all_x[i] = np.concatenate((cur_subset, cur_other), axis=1).tolist()
elif target_dataset == 'HM':
    data_path = './data/CDSL/'
    all_x = pickle.load(open(data_path + 'x.pkl', 'rb'))
    all_y = pickle.load(open(data_path + 'y.pkl', 'rb'))
    all_time = pickle.load(open(data_path + 'y.pkl', 'rb'))
    all_x_len = [len(i) for i in all_x]

    for i in range(len(all_time)):
        for j in range(len(all_time[i])):
            all_time[i][j] = all_time[i][j][-1]
            all_y[i][j] = all_y[i][j][0]

    tar_subset_idx = [5, 6, 4, 2, 3, 48, 79, 76, 87, 25, 30, 31, 18, 43, 58, 66, 40, 57, 23, 92, 50, 54, 91, 60, 39, 81]
    tar_other_idx = list(range(99))
    for i in tar_subset_idx:
        tar_other_idx.remove(i)
    for i in range(len(all_x)):
        cur = np.array(all_x[i], dtype=float)
        cur_subset = cur[:, tar_subset_idx]
        cur_other = cur[:, tar_other_idx]
        all_x[i] = np.concatenate((cur_subset, cur_other), axis=1).tolist()
    
print(all_x[0])
print(len(all_x[0][0]))
print(len(all_x))
logger.info(all_x[0])
logger.info(len(all_x[0][0]))
logger.info(len(all_x))

[[-0.43249781572948887, 0.7734225027254102, 0.11121635247274919, -1.1510759266125783, -0.5775173789921769, -0.894979488306686, 0.6418248141325145, -1.1001333024577382, -0.09626591113773565, 0.07837976568684282, 0.27346416059268763, -0.29216681084789653, -0.20501606288246071, -0.44880109579951655, -0.6600819004124745, -0.5342137257388455, -0.16268638728171492, 1.013400735854239, 1.0, 1.098202336859675, -0.6863245135763378, -0.4670502544450039, -0.41420989908626493, -0.2941064849292665, 0.2534399947977164, 0.6689953308348103, -0.2780944554864925, 1.6237612679911448, -0.016555502218216205, -0.411288788529273, -0.5683812105284773, -0.9512610100808374, -0.9403624977548863, 0.651423723145103, -0.43323556246024986, 0.6921334800493779, 2.9116903104103184, 0.2360670258723392, -0.2674665445678642, 0.4444334033508993, -0.21164668014859825, -0.16135240337065737, -0.7969887859671778, -0.8495988149987506, -0.705221190542043, -1.0534078933590525, 1.3058941843049534, -0.3135827086480343, -0.3167900638

In [None]:
long_x = all_x
long_y = all_y
long_y_kfold = [each[-1] for each in all_y]
long_time = all_time

In [None]:
# def get_n2n_data(x, y, x_len):
#     length = len(x)
#     assert length == len(y)
#     assert length == len(x_len)
#     new_x = []
#     new_y = []
#     new_x_len = []
#     for i in range(length):
#         for j in range(len(x[i])):
#             new_x.append(x[i][:j+1])
#             new_y.append(y[i][j])
#             new_x_len.append(j+1)
#     return new_x, new_y, new_x_len
def get_n2n_data(x, y, x_len, outcome=None):
    length = len(x)
    assert length == len(y)
    assert length == len(outcome)
    assert length == len(x_len)
    new_x = []
    new_y = []
    new_outcome = []
    new_x_len = []
    for i in range(length):
        for j in range(len(x[i])):
            new_x.append(x[i][:j+1])
            new_y.append(y[i][j])
            new_outcome.append(outcome[i][j])
            new_x_len.append(j+1)
    return new_x, new_y, new_x_len, new_outcome

In [None]:
class distcare_target(nn.Module):
    def __init__(self, input_dim, hidden_dim, d_model,  MHD_num_head, d_ff, output_dim, keep_prob=0.5):
        super(distcare_target, self).__init__()

        # hyperparameters
        self.input_dim = input_dim  
        self.hidden_dim = hidden_dim  # d_model
        self.d_model = d_model
        self.MHD_num_head = MHD_num_head
        self.d_ff = d_ff
        self.output_dim = output_dim
        self.keep_prob = keep_prob

        # layers
        self.PositionalEncoding = PositionalEncoding(self.d_model, dropout = 0, max_len = 400)

        self.GRUs = clones(nn.GRU(1, self.hidden_dim, batch_first = True), self.input_dim)
        
        self.LastStepAttentions = clones(SingleAttention(self.hidden_dim, 16, attention_type='concat', demographic_dim=12, time_aware=True, use_demographic=False),self.input_dim)
        
        self.FinalAttentionQKV = FinalAttentionQKV(self.hidden_dim, self.hidden_dim, attention_type='mul',dropout = 1 - self.keep_prob)

        self.MultiHeadedAttention = MultiHeadedAttention(self.MHD_num_head, self.d_model,dropout = 1 - self.keep_prob)
        self.SublayerConnection = SublayerConnection(self.d_model, dropout = 1 - self.keep_prob)

        self.PositionwiseFeedForward = PositionwiseFeedForward(self.d_model, self.d_ff, dropout=0.1)

        self.demo_proj_main = nn.Linear(12, self.hidden_dim)
        self.demo_proj = nn.Linear(12, self.hidden_dim)
        self.output = nn.Linear(self.hidden_dim, self.output_dim)

        self.dropout = nn.Dropout(p = 1 - self.keep_prob)
        self.FC_embed = nn.Linear(self.hidden_dim, self.hidden_dim)
        self.tanh=nn.Tanh()
        self.MLP = nn.Sequential(
            nn.Linear(self.hidden_dim, 8),
            nn.ReLU(),
            nn.Linear(8, self.output_dim)
        )
        self.MLP_outcome = nn.Sequential(
            nn.Linear(self.hidden_dim, 8),
            nn.ReLU(),
            nn.Linear(8, self.output_dim)
        )
        self.softmax = nn.Softmax()
        self.sigmoid = nn.Sigmoid()
        self.relu=nn.ReLU()

    def forward(self, input, lens):
        lens = lens.to('cpu')
        # input shape [batch_size, timestep, feature_dim]
#         demo_main = self.tanh(self.demo_proj_main(demo_input)).unsqueeze(1)# b hidden_dim
        
        batch_size = input.size(0)
        time_step = input.size(1)
        feature_dim = input.size(2)
        assert(feature_dim == self.input_dim)# input Tensor : 256 * 48 * 76
        assert(self.d_model % self.MHD_num_head == 0)

        # Initialization
        #cur_hs = Variable(torch.zeros(batch_size, self.hidden_dim).unsqueeze(0))

        # forward
        # GRU_embeded_input = self.GRUs[0](input[:,:,0].unsqueeze(-1), Variable(torch.zeros(batch_size, self.hidden_dim).unsqueeze(0)).to(device))[0] # b t h
        # Attention_embeded_input = self.LastStepAttentions[0](GRU_embeded_input)[0].unsqueeze(1)# b 1 h
        # for i in range(feature_dim-1):
        #     embeded_input = self.GRUs[i+1](input[:,:,i+1].unsqueeze(-1), Variable(torch.zeros(batch_size, self.hidden_dim).unsqueeze(0)).to(device))[0] # b 1 h
        #     embeded_input = self.LastStepAttentions[i+1](embeded_input)[0].unsqueeze(1)# b 1 h
        #     Attention_embeded_input = torch.cat((Attention_embeded_input, embeded_input), 1)# b i h

        # Attention_embeded_input = torch.cat((Attention_embeded_input, demo_main), 1)# b i+1 h
        # posi_input = self.dropout(Attention_embeded_input) # batch_size * d_input+1 * hidden_dim

#         input = pack_padded_sequence(input, lens, batch_first=True)
        
        GRU_embeded_input = self.GRUs[0](pack_padded_sequence(input[:,:,0].unsqueeze(-1), lens, batch_first=True))[1].squeeze().unsqueeze(1) # b 1 h
#         print(GRU_embeded_input.shape)
        for i in range(feature_dim-1):
            embeded_input = self.GRUs[i+1](pack_padded_sequence(input[:,:,i+1].unsqueeze(-1), lens, batch_first=True))[1].squeeze().unsqueeze(1) # b 1 h
            GRU_embeded_input = torch.cat((GRU_embeded_input, embeded_input), 1)
        

#         GRU_embeded_input = torch.cat((GRU_embeded_input, demo_main), 1)# b i+1 h
        posi_input = self.dropout(GRU_embeded_input) # batch_size * d_input * hidden_dim


        #mask = subsequent_mask(time_step).to(device) # 1 t t 下三角 N to 1任务不用mask
        contexts = self.SublayerConnection(posi_input, lambda x: self.MultiHeadedAttention(posi_input, posi_input, posi_input, None))# # batch_size * d_input * hidden_dim
    
        DeCov_loss = contexts[1]
        contexts = contexts[0]

        contexts = self.SublayerConnection(contexts, lambda x: self.PositionwiseFeedForward(contexts))[0]# # batch_size * d_input * hidden_dim
        #contexts = contexts.view(batch_size, feature_dim * self.hidden_dim)#
        # contexts = torch.matmul(self.Wproj, contexts) + self.bproj
        # contexts = contexts.squeeze()
        # demo_key = self.demo_proj(demo_input)# b hidden_dim
        # demo_key = self.relu(demo_key)
        # input_dim_scores = torch.matmul(contexts, demo_key.unsqueeze(-1)).squeeze() # b i
        # input_dim_scores = self.dropout(self.sigmoid(input_dim_scores)).unsqueeze(1)# b i
        
        # weighted_contexts = torch.matmul(input_dim_scores, contexts).squeeze()
#         print(contexts.shape)

        weighted_contexts = self.FinalAttentionQKV(contexts)[0]
        #output_embed = self.FC_embed(weighted_contexts)
        output = self.MLP(self.dropout(weighted_contexts))# b 1
        outcome = self.MLP_outcome(self.dropout(weighted_contexts))# b 1
        outcome = F.sigmoid(outcome)
        if self.output_dim != 1:
            output = F.softmax(output, dim=1)
#         print(weighted_contexts.shape)
          
        return output, DeCov_loss, weighted_contexts, outcome
    #, self.MultiHeadedAttention.attn


In [None]:
def transfer_gru_dict(pretrain_dict, model_dict, latest_idx, common_len):
    state_dict = {}
    
    for k, v in model_dict.items():
        model_point_position1 = k.find('.')
        model_module_name = k[:model_point_position1]
        if "GRUs" == model_module_name:
            model_point_position2 = k.find('.', model_point_position1+1)
            model_module_idx = int(k[model_point_position1 + 1: model_point_position2])
            print(f'model_module_idx is {model_module_idx}')
            if model_module_idx < common_len:
                state_dict[k] = pretrain_dict[k]
            else:
                diff_idx = model_module_idx - common_len
                target_module_idx = int(str(latest_idx[diff_idx]))
                if target_module_idx < common_len:
                    target_module_name = "GRUs"
                    target_k = target_module_name +'.' + str(target_module_idx) + '.' + k[model_point_position2+1:]
                    state_dict[k] = pretrain_dict[target_k]
                else:
                    target_module_name = "generalGRUs"
                    target_k = target_module_name +'.' + str(target_module_idx - common_len) + '.' + k[model_point_position2+1:]
                    state_dict[k] = pretrain_dict[target_k]
    return state_dict

In [None]:
if target_dataset == 'PD':
    input_dim = 69
elif target_dataset == 'TJ':
    input_dim = 75
elif target_dataset == 'HM':
    input_dim = 99
    
cell = 'GRU'
hidden_dim = 64
d_model = 64
MHD_num_head = 4
d_ff = 64
output_dim = 1

In [None]:
def ckd_batch_iter(x, y, lens, batch_size, shuffle=False, outcome=None):
    """ Yield batches of source and target sentences reverse sorted by length (largest to smallest).
    @param data (list of (src_sent, tgt_sent)): list of tuples containing source and target sentence
    @param batch_size (int): batch size
    @param shuffle (boolean): whether to randomly shuffle the dataset
    """
    batch_num = math.ceil(len(x) / batch_size) # 向下取整
    index_array = list(range(len(x)))

    if shuffle:
        np.random.shuffle(index_array)

    for i in range(batch_num):
        indices = index_array[i * batch_size: (i + 1) * batch_size] #  fetch out all the induces
        
        examples = []
        for idx in indices:
            examples.append((x[idx], y[idx],  lens[idx], outcome[idx]))
       
        examples = sorted(examples, key=lambda e: len(e[0]), reverse=True)
    
        batch_x = [e[0] for e in examples]
        batch_y = [e[1] for e in examples]
#         batch_name = [e[2] for e in examples]
        batch_lens = [e[2] for e in examples]
        batch_outcome = [e[3] for e in examples]
       

        yield batch_x, batch_y, batch_lens, batch_outcome

In [None]:
class TargetMultitaskLoss(nn.Module):
    def __init__(self, task_num=2):
        super(TargetMultitaskLoss, self).__init__()
        self.task_num = task_num
        self.alpha = nn.Parameter(torch.ones((task_num)), requires_grad=True)
        self.mse = nn.MSELoss()
        self.bce = nn.BCELoss()

    def forward(self, opt_student, los, outcome, outcome_y):
        MSE_Loss = self.mse(opt_student, los)
        BCE_Loss = self.bce(outcome, outcome_y)
        return MSE_Loss * self.alpha[0] + BCE_Loss * self.alpha[1]

def get_target_multitask_loss(opt_student, los, outcome, outcome_y):
    mtl = TargetMultitaskLoss(task_num=2)
    return mtl(opt_student, los, outcome, outcome_y)

def reverse_los(y, los_info):
    return y * los_info["los_std"] + los_info["los_mean"]

In [None]:
los_info = pickle.load(open(data_path + 'los_info.pkl', 'rb'))
print(los_info)
logger.info(los_info)

{'los_mean': 6.927731092436975, 'los_std': 5.1253246527009555, 'los_median': 6.0, 'large_los': 26.649999999999977, 'threshold': 4.9562737642585555}


In [None]:
if target_dataset == 'TJ':
    n_splits = 10
    epochs = 150
elif target_dataset == 'HM':
    n_splits = 3
    epochs = 20

transfer_flag = True
kfold = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=RANDOM_SEED)

if target_dataset == 'TJ':    
    data_str = 'covid'
elif target_dataset == 'HM':
    data_str = 'spain'

if teacher_flag:
    file_name = './model/pretrained-challenge-front-fill-2'+ data_str
else: 
    file_name = './model/pretrained-challenge-front-fill-2'+ data_str + '-noteacher'


batch_size = 256

fold_count = 0
total_train_loss = []
total_valid_loss = []

global_best = 10000
mse = []
mad = []
mape = []
auroc = []
auprc = []
kappa = []
history = []

pad_token = np.zeros(input_dim)
# begin_time = time.time()

for train, test in kfold.split(long_x, long_y_kfold):
    
    model = distcare_target(input_dim = input_dim,output_dim=output_dim, d_model=d_model, MHD_num_head=MHD_num_head, d_ff=d_ff, hidden_dim=hidden_dim).to(device)
    
    if transfer_flag:
        checkpoint = torch.load(file_name, \
                        map_location=torch.device("cuda:0" if torch.cuda.is_available() == True else 'cpu'))
        pretrain_dict = checkpoint['net']
        model_dict = model.state_dict()
        pretrain_dict = transfer_gru_dict(pretrain_dict, model_dict,latest_idx, common_len)
        model_dict.update(pretrain_dict)
        model.load_state_dict(model_dict)
        
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

    fold_count += 1
#     print(train)

    
    train_x = [long_x[i] for i in train]
    train_y = [long_time[i] for i in train]
    train_outcome = [long_y[i] for i in train]
    train_x_len = [all_x_len[i] for i in train]
    #train_static = [long_static[i] for i in train]
    
    train_x, train_y, train_x_len, train_outcome = get_n2n_data(train_x, train_y, train_x_len, outcome=train_outcome)
    if len(train_x) % 256 == 1:
        print(len(train_x))
        print('wrong squeeze!')
    
    test_x = [long_x[i] for i in test]
    test_y = [long_time[i] for i in test]
    test_outcome = [long_y[i] for i in test]
    test_x_len = [all_x_len[i] for i in test]
    #test_static = [long_static[i] for i in test]
    
    test_x, test_y, test_x_len, test_outcome = get_n2n_data(test_x, test_y, test_x_len, outcome=test_outcome)
    
    if not os.path.exists('./model/'+data_str):
        os.mkdir('./model/'+data_str)
        
    if transfer_flag:
        target_file_name = './model/'+data_str+'/distcare-trans-'+str(n_splits)+'-fold-LOS-regression' + str(fold_count)#4114
    else:
        target_file_name = './model/'+data_str+'/distcare-no-trans-'+str(n_splits)+'-fold-LOS-regression' + str(fold_count)#4114
    
    fold_train_loss = []
    fold_valid_loss = []
    best_mse = 10000
    best_mad = 0
    best_auroc = 0
    beat_auprc = 0
    best_mape = 0
    best_kappa = 0
    
    for each_epoch in range(epochs):
       
        
        epoch_loss = []
        counter_batch = 0
        model.train()  
        
        for step, (batch_x, batch_y, batch_lens, batch_outcome) in enumerate(ckd_batch_iter(train_x, train_y, train_x_len, batch_size, shuffle=True, outcome=train_outcome)):  
            optimizer.zero_grad()
            batch_x = torch.tensor(pad_sents(batch_x, pad_token), dtype=torch.float32).to(device)
            batch_y = torch.tensor(batch_y, dtype=torch.float32).to(device)
            batch_lens = torch.tensor(batch_lens, dtype=torch.float32).to(device).int()
            batch_outcome = torch.tensor(batch_outcome, dtype=torch.float32).to(device)

            masks = length_to_mask(batch_lens).unsqueeze(-1).float()

            opt, decov_loss, emb, outcome = model(batch_x, batch_lens)

#             MSE_Loss = get_re_loss(opt, batch_y.unsqueeze(-1))
            pred_loss = get_target_multitask_loss(opt, batch_y.unsqueeze(-1), outcome, batch_outcome.unsqueeze(-1))

            model_loss = pred_loss + 1e7*decov_loss

            loss = model_loss

            epoch_loss.append(pred_loss.cpu().detach().numpy())
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 20)
            optimizer.step()
            
            if step % 50 == 0:
                print('Fold %d Epoch %d Batch %d: Train Loss = %.4f'%(fold_count,each_epoch, step, loss.cpu().detach().numpy()))
                logger.info('Fold %d Epoch %d Batch %d: Train Loss = %.4f'%(fold_count,each_epoch, step, loss.cpu().detach().numpy()))
            
        epoch_loss = np.mean(epoch_loss)
        fold_train_loss.append(epoch_loss)

        #Validation
        y_true = []
        y_pred = []
        y_pred_flatten = []
        y_true_flatten = []
        outcome_pred_flatten = []
        outcome_true_flatten = []
        with torch.no_grad():
            model.eval()
            valid_loss = []
            valid_true = []
            valid_pred = []
            for batch_x, batch_y, batch_lens, batch_outcome in ckd_batch_iter(test_x, test_y, test_x_len, batch_size, outcome=test_outcome):
                batch_x = torch.tensor(pad_sents(batch_x, pad_token), dtype=torch.float32).to(device)
                batch_y = torch.tensor(batch_y, dtype=torch.float32).to(device)
                batch_lens = torch.tensor(batch_lens, dtype=torch.float32).to(device).int()
                batch_outcome = torch.tensor(batch_outcome, dtype=torch.float32).to(device)
                masks = length_to_mask(batch_lens).unsqueeze(-1).float()
               
                opt, decov_loss, emb, outcome = model(batch_x, batch_lens)
                
#                 MSE_Loss = get_re_loss(opt, batch_y.unsqueeze(-1))
                pred_loss = get_target_multitask_loss(opt, batch_y.unsqueeze(-1), outcome, batch_outcome.unsqueeze(-1))
                
                valid_loss.append(pred_loss.cpu().detach().numpy())

                y_pred_flatten += [reverse_los(x, los_info) for x in list(opt.cpu().detach().numpy().flatten())]
                y_true_flatten += [reverse_los(x, los_info) for x in list(batch_y.cpu().numpy().flatten())]
                outcome_pred_flatten += list(outcome.cpu().detach().numpy().flatten())
                outcome_true_flatten += list(batch_outcome.cpu().numpy().flatten())
            

            valid_loss = np.mean(valid_loss)
            fold_valid_loss.append(valid_loss)
            ret = metrics.print_metrics_regression(y_true_flatten, y_pred_flatten, verbose=0)
            ret_outcome = metrics.print_metrics_binary(outcome_true_flatten, outcome_pred_flatten, verbose=0)
            history.append((ret, ret_outcome))
            #print()

            if each_epoch % 10 == 0:
                print('Fold %d, epoch %d: Loss = %.4f Valid loss = %.4f MSE = %.4f AUROC = %.4f' % (
                    fold_count, each_epoch, fold_train_loss[-1], fold_valid_loss[-1], ret['mse'], ret_outcome['auroc']), flush=True)
                logger.info('Fold %d, epoch %d: Loss = %.4f Valid loss = %.4f MSE = %.4f AUROC = %.4f' % (
                    fold_count, each_epoch, fold_train_loss[-1], fold_valid_loss[-1], ret['mse'], ret_outcome['auroc']))
                # metrics.print_metrics_regression(y_true_flatten, y_pred_flatten)
                
            cur_mse = ret['mse']
            if cur_mse < best_mse:
                print('------------ Save FOLD-BEST model - MSE: %.4f ------------' % cur_mse, flush=True)
                logger.info('------------ Save FOLD-BEST model - MSE: %.4f ------------' % cur_mse)
                metrics.print_metrics_regression(y_true_flatten, y_pred_flatten)
                best_mse = cur_mse
                best_mad = ret['mad']
                best_auroc = ret_outcome['auroc']
                best_auprc = ret_outcome['auprc']
                state = {
                    'net': model.state_dict(),
                    'optimizer': optimizer.state_dict(),
                    'epoch': each_epoch
                }
                torch.save(state, target_file_name + '_' + str(fold_count))

                if cur_mse < global_best:
                    global_best = cur_mse
                    state = {
                        'net': model.state_dict(),
                        'optimizer': optimizer.state_dict(),
                        'epoch': each_epoch
                    }
                    torch.save(state, target_file_name)
                    print('------------ Save best model - MSE: %.4f ------------' % cur_mse, flush=True)
                    logger.info('------------ Save best model - MSE: %.4f ------------' % cur_mse)

        print('Fold %d, mse = %.4f, mad = %.4f' % (fold_count, ret['mse'], ret['mad']), flush=True)
        logger.info('Fold %d, mse = %.4f, mad = %.4f' % (fold_count, ret['mse'], ret['mad']))

    mse.append(best_mse)
    mad.append(best_mad)
    auroc.append(best_auroc)
    auprc.append(best_auprc)
    total_train_loss.append(fold_train_loss)
    total_valid_loss.append(fold_valid_loss)


print('mse %.4f(%.4f)' % (np.mean(mse), np.std(mse)))
print('mad %.4f(%.4f)' % (np.mean(mad), np.std(mad)))
print('auroc %.4f(%.4f)' % (np.mean(auroc), np.std(auroc)))
print('auprc %.4f(%.4f)' % (np.mean(auprc), np.std(auprc)))
logger.info('mse %.4f(%.4f)' % (np.mean(mse), np.std(mse)))
logger.info('mad %.4f(%.4f)' % (np.mean(mad), np.std(mad)))
logger.info('auroc %.4f(%.4f)' % (np.mean(auroc), np.std(auroc)))
logger.info('auprc %.4f(%.4f)' % (np.mean(auprc), np.std(auprc)))

model_module_idx is 0
model_module_idx is 0
model_module_idx is 0
model_module_idx is 0
model_module_idx is 1
model_module_idx is 1
model_module_idx is 1
model_module_idx is 1
model_module_idx is 2
model_module_idx is 2
model_module_idx is 2
model_module_idx is 2
model_module_idx is 3
model_module_idx is 3
model_module_idx is 3
model_module_idx is 3
model_module_idx is 4
model_module_idx is 4
model_module_idx is 4
model_module_idx is 4
model_module_idx is 5
model_module_idx is 5
model_module_idx is 5
model_module_idx is 5
model_module_idx is 6
model_module_idx is 6
model_module_idx is 6
model_module_idx is 6
model_module_idx is 7
model_module_idx is 7
model_module_idx is 7
model_module_idx is 7
model_module_idx is 8
model_module_idx is 8
model_module_idx is 8
model_module_idx is 8
model_module_idx is 9
model_module_idx is 9
model_module_idx is 9
model_module_idx is 9
model_module_idx is 10
model_module_idx is 10
model_module_idx is 10
model_module_idx is 10
model_module_idx is 11
model



Fold 1 Epoch 0 Batch 0: Train Loss = 296339.7812
Fold 1, epoch 0: Loss = 2.5172 Valid loss = 2.0982 MSE = 36.7359 AUROC = 0.5607
------------ Save FOLD-BEST model - MSE: 36.7359 ------------


  prec0 = cf[0][0] / (cf[0][0] + cf[1][0])


Custom bins confusion matrix:
[[  0 107]
 [  0  76]]
Mean absolute deviation (MAD) = 4.980107665202091
Mean squared error (MSE) = 36.73589934031104
Mean absolute percentage error (MAPE) = 732.289965156289
Cohen kappa score = 0.0
------------ Save best model - MSE: 36.7359 ------------
Fold 1, mse = 36.7359, mad = 4.9801




Fold 1 Epoch 1 Batch 0: Train Loss = 34347.4414
------------ Save FOLD-BEST model - MSE: 34.8208 ------------
Custom bins confusion matrix:
[[58 49]
 [16 60]]
Mean absolute deviation (MAD) = 4.745702247877841
Mean squared error (MSE) = 34.82084713250918
Mean absolute percentage error (MAPE) = 661.6999503994583
Cohen kappa score = 0.3119106843292647
------------ Save best model - MSE: 34.8208 ------------
Fold 1, mse = 34.8208, mad = 4.7457




Fold 1 Epoch 2 Batch 0: Train Loss = 8215.6104
Fold 1, mse = 35.3273, mad = 4.8426




Fold 1 Epoch 3 Batch 0: Train Loss = 4077.0901
------------ Save FOLD-BEST model - MSE: 34.8060 ------------


  prec1 = cf[1][1] / (cf[1][1] + cf[0][1])


Custom bins confusion matrix:
[[  6 101]
 [  3  73]]
Mean absolute deviation (MAD) = 4.806338186283997
Mean squared error (MSE) = 34.80599440512324
Mean absolute percentage error (MAPE) = 698.3228943040973
Cohen kappa score = 0.013988187752564607
------------ Save best model - MSE: 34.8060 ------------
Fold 1, mse = 34.8060, mad = 4.8063




Fold 1 Epoch 4 Batch 0: Train Loss = 1823.4424
------------ Save FOLD-BEST model - MSE: 33.8486 ------------


  prec1 = cf[1][1] / (cf[1][1] + cf[0][1])


Custom bins confusion matrix:
[[54 53]
 [10 66]]
Mean absolute deviation (MAD) = 4.691308952026364
Mean squared error (MSE) = 33.84863006291425
Mean absolute percentage error (MAPE) = 669.3819174127128
Cohen kappa score = 0.34483150537023355
------------ Save best model - MSE: 33.8486 ------------
Fold 1, mse = 33.8486, mad = 4.6913




Fold 1 Epoch 5 Batch 0: Train Loss = 950.2600
Fold 1, mse = 35.0779, mad = 4.8663


  prec1 = cf[1][1] / (cf[1][1] + cf[0][1])


Fold 1 Epoch 6 Batch 0: Train Loss = 452.7533
Fold 1, mse = 34.4798, mad = 4.8138


  prec1 = cf[1][1] / (cf[1][1] + cf[0][1])


Fold 1 Epoch 7 Batch 0: Train Loss = 257.9988
Fold 1, mse = 36.6186, mad = 5.0749


  prec1 = cf[1][1] / (cf[1][1] + cf[0][1])


Fold 1 Epoch 8 Batch 0: Train Loss = 119.7934
Fold 1, mse = 33.8923, mad = 4.7730


  prec1 = cf[1][1] / (cf[1][1] + cf[0][1])


Fold 1 Epoch 9 Batch 0: Train Loss = 67.9862
------------ Save FOLD-BEST model - MSE: 33.7874 ------------


  prec1 = cf[1][1] / (cf[1][1] + cf[0][1])


Custom bins confusion matrix:
[[44 63]
 [ 8 68]]
Mean absolute deviation (MAD) = 4.76608129019457
Mean squared error (MSE) = 33.78735471923503
Mean absolute percentage error (MAPE) = 709.0758417754615
Cohen kappa score = 0.2769213645723191
------------ Save best model - MSE: 33.7874 ------------
Fold 1, mse = 33.7874, mad = 4.7661




Fold 1 Epoch 10 Batch 0: Train Loss = 31.4822
Fold 1, epoch 10: Loss = 2.3096 Valid loss = 2.0001 MSE = 35.1433 AUROC = 0.6483
Fold 1, mse = 35.1433, mad = 4.9379


  prec1 = cf[1][1] / (cf[1][1] + cf[0][1])


Fold 1 Epoch 11 Batch 0: Train Loss = 15.0275
Fold 1, mse = 34.5276, mad = 4.8630


  prec1 = cf[1][1] / (cf[1][1] + cf[0][1])


Fold 1 Epoch 12 Batch 0: Train Loss = 9.6656
------------ Save FOLD-BEST model - MSE: 32.9686 ------------


  prec1 = cf[1][1] / (cf[1][1] + cf[0][1])


Custom bins confusion matrix:
[[59 48]
 [14 62]]
Mean absolute deviation (MAD) = 4.67400261406755
Mean squared error (MSE) = 32.96863098032762
Mean absolute percentage error (MAPE) = 686.2828785518894
Cohen kappa score = 0.3448435154174847
------------ Save best model - MSE: 32.9686 ------------
Fold 1, mse = 32.9686, mad = 4.6740




Fold 1 Epoch 13 Batch 0: Train Loss = 6.0909
Fold 1, mse = 34.5306, mad = 4.8817


  prec1 = cf[1][1] / (cf[1][1] + cf[0][1])


Fold 1 Epoch 14 Batch 0: Train Loss = 3.9481
Fold 1, mse = 33.8692, mad = 4.8119


  prec1 = cf[1][1] / (cf[1][1] + cf[0][1])


Fold 1 Epoch 15 Batch 0: Train Loss = 3.4487
Fold 1, mse = 34.7255, mad = 4.9079


  prec1 = cf[1][1] / (cf[1][1] + cf[0][1])


Fold 1 Epoch 16 Batch 0: Train Loss = 3.0803
Fold 1, mse = 35.4013, mad = 4.9824


  prec1 = cf[1][1] / (cf[1][1] + cf[0][1])


Fold 1 Epoch 17 Batch 0: Train Loss = 2.9274
Fold 1, mse = 33.4381, mad = 4.7661


  prec1 = cf[1][1] / (cf[1][1] + cf[0][1])


Fold 1 Epoch 18 Batch 0: Train Loss = 2.7050
Fold 1, mse = 33.8838, mad = 4.8122


  prec1 = cf[1][1] / (cf[1][1] + cf[0][1])


Fold 1 Epoch 19 Batch 0: Train Loss = 2.7903
Fold 1, mse = 34.8803, mad = 4.9206


  prec1 = cf[1][1] / (cf[1][1] + cf[0][1])


Fold 1 Epoch 20 Batch 0: Train Loss = 2.7726
Fold 1, epoch 20: Loss = 2.2193 Valid loss = 1.8984 MSE = 33.3624 AUROC = 0.8761
Fold 1, mse = 33.3624, mad = 4.7504


  prec1 = cf[1][1] / (cf[1][1] + cf[0][1])


Fold 1 Epoch 21 Batch 0: Train Loss = 2.4990
Fold 1, mse = 34.9860, mad = 4.9160


  prec1 = cf[1][1] / (cf[1][1] + cf[0][1])


Fold 1 Epoch 22 Batch 0: Train Loss = 2.4408
Fold 1, mse = 35.0340, mad = 4.9177


  prec1 = cf[1][1] / (cf[1][1] + cf[0][1])


Fold 1 Epoch 23 Batch 0: Train Loss = 2.4354
------------ Save FOLD-BEST model - MSE: 32.5878 ------------
Custom bins confusion matrix:
[[53 54]
 [ 9 67]]
Mean absolute deviation (MAD) = 4.632330927947478
Mean squared error (MSE) = 32.587816782490265
Mean absolute percentage error (MAPE) = 712.5752674223722
Cohen kappa score = 0.3471317741661476
------------ Save best model - MSE: 32.5878 ------------
Fold 1, mse = 32.5878, mad = 4.6323




Fold 1 Epoch 24 Batch 0: Train Loss = 2.4373
Fold 1, mse = 35.5565, mad = 4.9256




Fold 1 Epoch 25 Batch 0: Train Loss = 2.2463
------------ Save FOLD-BEST model - MSE: 32.3848 ------------
Custom bins confusion matrix:
[[50 57]
 [ 7 69]]
Mean absolute deviation (MAD) = 4.597266989602002
Mean squared error (MSE) = 32.384821035597284
Mean absolute percentage error (MAPE) = 711.0843838255745
Cohen kappa score = 0.34253957561468507
------------ Save best model - MSE: 32.3848 ------------
Fold 1, mse = 32.3848, mad = 4.5973




Fold 1 Epoch 26 Batch 0: Train Loss = 2.3386
Fold 1, mse = 33.5734, mad = 4.7478




Fold 1 Epoch 27 Batch 0: Train Loss = 2.6647
------------ Save FOLD-BEST model - MSE: 31.1586 ------------
Custom bins confusion matrix:
[[56 51]
 [ 8 68]]
Mean absolute deviation (MAD) = 4.467344779142547
Mean squared error (MSE) = 31.15864374173218
Mean absolute percentage error (MAPE) = 680.9736239823505
Cohen kappa score = 0.3864295050292663
------------ Save best model - MSE: 31.1586 ------------
Fold 1, mse = 31.1586, mad = 4.4673




Fold 1 Epoch 28 Batch 0: Train Loss = 2.4134
Fold 1, mse = 34.0094, mad = 4.7738




Fold 1 Epoch 29 Batch 0: Train Loss = 2.1182
------------ Save FOLD-BEST model - MSE: 31.1204 ------------
Custom bins confusion matrix:
[[56 51]
 [ 8 68]]
Mean absolute deviation (MAD) = 4.437923105677044
Mean squared error (MSE) = 31.120386364507148
Mean absolute percentage error (MAPE) = 680.2451831562217
Cohen kappa score = 0.3864295050292663
------------ Save best model - MSE: 31.1204 ------------
Fold 1, mse = 31.1204, mad = 4.4379




Fold 1 Epoch 30 Batch 0: Train Loss = 2.2712
Fold 1, epoch 30: Loss = 2.1956 Valid loss = 1.8882 MSE = 34.7393 AUROC = 0.9279
Fold 1, mse = 34.7393, mad = 4.8661




Fold 1 Epoch 31 Batch 0: Train Loss = 2.5368
Fold 1, mse = 31.8554, mad = 4.5708




Fold 1 Epoch 32 Batch 0: Train Loss = 2.1263
------------ Save FOLD-BEST model - MSE: 30.1063 ------------
Custom bins confusion matrix:
[[73 34]
 [21 55]]
Mean absolute deviation (MAD) = 4.351808810669797
Mean squared error (MSE) = 30.10631433670309
Mean absolute percentage error (MAPE) = 614.3883939722488
Cohen kappa score = 0.3961120777584448
------------ Save best model - MSE: 30.1063 ------------
Fold 1, mse = 30.1063, mad = 4.3518
Fold 1 Epoch 33 Batch 0: Train Loss = 2.3269




Fold 1, mse = 35.8048, mad = 4.8862




Fold 1 Epoch 34 Batch 0: Train Loss = 2.1830
Fold 1, mse = 31.5686, mad = 4.4372




Fold 1 Epoch 35 Batch 0: Train Loss = 2.1822
Fold 1, mse = 32.0065, mad = 4.4946




Fold 1 Epoch 36 Batch 0: Train Loss = 2.2408
Fold 1, mse = 32.0846, mad = 4.5187




Fold 1 Epoch 37 Batch 0: Train Loss = 2.0945
Fold 1, mse = 30.7180, mad = 4.3755




Fold 1 Epoch 38 Batch 0: Train Loss = 1.8456
Fold 1, mse = 33.1884, mad = 4.5827




Fold 1 Epoch 39 Batch 0: Train Loss = 2.2294
Fold 1, mse = 30.8866, mad = 4.3262




Fold 1 Epoch 40 Batch 0: Train Loss = 2.2251
Fold 1, epoch 40: Loss = 1.9587 Valid loss = 1.6343 MSE = 32.3297 AUROC = 0.9647
Fold 1, mse = 32.3297, mad = 4.4498




Fold 1 Epoch 41 Batch 0: Train Loss = 2.2289
Fold 1, mse = 31.5433, mad = 4.4360




Fold 1 Epoch 42 Batch 0: Train Loss = 2.1549
------------ Save FOLD-BEST model - MSE: 29.6221 ------------
Custom bins confusion matrix:
[[67 40]
 [14 62]]
Mean absolute deviation (MAD) = 4.262070082788894
Mean squared error (MSE) = 29.62209211476328
Mean absolute percentage error (MAPE) = 651.6074726678871
Cohen kappa score = 0.4210896309314587
------------ Save best model - MSE: 29.6221 ------------
Fold 1, mse = 29.6221, mad = 4.2621




Fold 1 Epoch 43 Batch 0: Train Loss = 2.1761
Fold 1, mse = 30.4037, mad = 4.3362




Fold 1 Epoch 44 Batch 0: Train Loss = 2.1844
Fold 1, mse = 31.0113, mad = 4.3468




Fold 1 Epoch 45 Batch 0: Train Loss = 1.7935
Fold 1, mse = 29.6631, mad = 4.2107




Fold 1 Epoch 46 Batch 0: Train Loss = 2.1005
Fold 1, mse = 30.5665, mad = 4.3239




Fold 1 Epoch 47 Batch 0: Train Loss = 2.1559
------------ Save FOLD-BEST model - MSE: 29.1026 ------------
Custom bins confusion matrix:
[[69 38]
 [17 59]]
Mean absolute deviation (MAD) = 4.1653594706648205
Mean squared error (MSE) = 29.10255858180802
Mean absolute percentage error (MAPE) = 625.1738836113066
Cohen kappa score = 0.40496600650310377
------------ Save best model - MSE: 29.1026 ------------
Fold 1, mse = 29.1026, mad = 4.1654




Fold 1 Epoch 48 Batch 0: Train Loss = 1.9094
Fold 1, mse = 29.8115, mad = 4.2404




Fold 1 Epoch 49 Batch 0: Train Loss = 2.0986
Fold 1, mse = 30.4150, mad = 4.2933




Fold 1 Epoch 50 Batch 0: Train Loss = 1.7977
Fold 1, epoch 50: Loss = 1.8247 Valid loss = 1.3380 MSE = 28.4855 AUROC = 0.9801
------------ Save FOLD-BEST model - MSE: 28.4855 ------------
Custom bins confusion matrix:
[[71 36]
 [21 55]]
Mean absolute deviation (MAD) = 4.099659344783411
Mean squared error (MSE) = 28.48552618608126
Mean absolute percentage error (MAPE) = 622.2055683381168
Cohen kappa score = 0.37647199473967363
------------ Save best model - MSE: 28.4855 ------------
Fold 1, mse = 28.4855, mad = 4.0997




Fold 1 Epoch 51 Batch 0: Train Loss = 1.9922
Fold 1, mse = 28.8606, mad = 4.1358




Fold 1 Epoch 52 Batch 0: Train Loss = 2.0764
------------ Save FOLD-BEST model - MSE: 27.9927 ------------
Custom bins confusion matrix:
[[77 30]
 [22 54]]
Mean absolute deviation (MAD) = 4.036291085299569
Mean squared error (MSE) = 27.99266405752327
Mean absolute percentage error (MAPE) = 604.2384602822151
Cohen kappa score = 0.4236918604651163
------------ Save best model - MSE: 27.9927 ------------
Fold 1, mse = 27.9927, mad = 4.0363




Fold 1 Epoch 53 Batch 0: Train Loss = 1.7015
Fold 1, mse = 29.8527, mad = 4.2634




Fold 1 Epoch 54 Batch 0: Train Loss = 1.8673
Fold 1, mse = 28.3102, mad = 4.0640




Fold 1 Epoch 55 Batch 0: Train Loss = 1.7207
Fold 1, mse = 30.4277, mad = 4.3205




Fold 1 Epoch 56 Batch 0: Train Loss = 1.7341
------------ Save FOLD-BEST model - MSE: 27.9745 ------------
Custom bins confusion matrix:
[[82 25]
 [27 49]]
Mean absolute deviation (MAD) = 4.004756734167037
Mean squared error (MSE) = 27.97454867323088
Mean absolute percentage error (MAPE) = 560.7401485867317
Cohen kappa score = 0.41266510307369464
------------ Save best model - MSE: 27.9745 ------------
Fold 1, mse = 27.9745, mad = 4.0048




Fold 1 Epoch 57 Batch 0: Train Loss = 1.4107
Fold 1, mse = 30.2421, mad = 4.3671




Fold 1 Epoch 58 Batch 0: Train Loss = 1.8520
Fold 1, mse = 28.9380, mad = 4.1541




Fold 1 Epoch 59 Batch 0: Train Loss = 1.9085
Fold 1, mse = 29.3007, mad = 4.2251




Fold 1 Epoch 60 Batch 0: Train Loss = 1.9698
Fold 1, epoch 60: Loss = 1.7613 Valid loss = 1.4011 MSE = 29.8007 AUROC = 0.9848
Fold 1, mse = 29.8007, mad = 4.3032




Fold 1 Epoch 61 Batch 0: Train Loss = 1.5149
------------ Save FOLD-BEST model - MSE: 27.5164 ------------
Custom bins confusion matrix:
[[85 22]
 [31 45]]
Mean absolute deviation (MAD) = 3.9778136038261453
Mean squared error (MSE) = 27.516449507385943
Mean absolute percentage error (MAPE) = 567.7488967572148
Cohen kappa score = 0.3932436659368158
------------ Save best model - MSE: 27.5164 ------------
Fold 1, mse = 27.5164, mad = 3.9778




Fold 1 Epoch 62 Batch 0: Train Loss = 1.6950
Fold 1, mse = 28.2621, mad = 4.1079




Fold 1 Epoch 63 Batch 0: Train Loss = 1.5631
Fold 1, mse = 28.1353, mad = 4.1134




Fold 1 Epoch 64 Batch 0: Train Loss = 1.5934
Fold 1, mse = 29.2031, mad = 4.2826




Fold 1 Epoch 65 Batch 0: Train Loss = 1.6754
Fold 1, mse = 27.7778, mad = 3.9895




Fold 1 Epoch 66 Batch 0: Train Loss = 1.7933
------------ Save FOLD-BEST model - MSE: 27.4474 ------------
Custom bins confusion matrix:
[[82 25]
 [26 50]]
Mean absolute deviation (MAD) = 3.995333157227029
Mean squared error (MSE) = 27.447359706743093
Mean absolute percentage error (MAPE) = 584.2971542371408
Cohen kappa score = 0.4250600628349658
------------ Save best model - MSE: 27.4474 ------------
Fold 1, mse = 27.4474, mad = 3.9953




Fold 1 Epoch 67 Batch 0: Train Loss = 1.6654
Fold 1, mse = 27.5034, mad = 4.1482




Fold 1 Epoch 68 Batch 0: Train Loss = 1.7634
------------ Save FOLD-BEST model - MSE: 26.0526 ------------
Custom bins confusion matrix:
[[73 34]
 [17 59]]
Mean absolute deviation (MAD) = 3.9831365977886635
Mean squared error (MSE) = 26.052576994708346
Mean absolute percentage error (MAPE) = 638.8158946330589
Cohen kappa score = 0.4441665177773807
------------ Save best model - MSE: 26.0526 ------------
Fold 1, mse = 26.0526, mad = 3.9831




Fold 1 Epoch 69 Batch 0: Train Loss = 1.2311
Fold 1, mse = 26.0790, mad = 4.0208




Fold 1 Epoch 70 Batch 0: Train Loss = 1.6258
Fold 1, epoch 70: Loss = 1.5596 Valid loss = 1.1267 MSE = 24.8122 AUROC = 0.9881
------------ Save FOLD-BEST model - MSE: 24.8122 ------------
Custom bins confusion matrix:
[[78 29]
 [21 55]]
Mean absolute deviation (MAD) = 3.8872458372698158
Mean squared error (MSE) = 24.81215697348162
Mean absolute percentage error (MAPE) = 595.276137139608
Cohen kappa score = 0.44585755813953487
------------ Save best model - MSE: 24.8122 ------------
Fold 1, mse = 24.8122, mad = 3.8872




Fold 1 Epoch 71 Batch 0: Train Loss = 1.5665
Fold 1, mse = 25.1103, mad = 3.8652




Fold 1 Epoch 72 Batch 0: Train Loss = 1.5312
Fold 1, mse = 25.6569, mad = 3.8572




Fold 1 Epoch 73 Batch 0: Train Loss = 1.6537
Fold 1, mse = 26.0243, mad = 3.9801




Fold 1 Epoch 74 Batch 0: Train Loss = 1.8320
Fold 1, mse = 26.4498, mad = 3.9782




Fold 1 Epoch 75 Batch 0: Train Loss = 1.5148
Fold 1, mse = 24.9812, mad = 3.8266




Fold 1 Epoch 76 Batch 0: Train Loss = 1.4707
Fold 1, mse = 27.1944, mad = 3.8262
Fold 1 Epoch 77 Batch 0: Train Loss = 1.4439




Fold 1, mse = 28.0590, mad = 3.9459
Fold 1 Epoch 78 Batch 0: Train Loss = 1.5796




Fold 1, mse = 28.4784, mad = 4.1452
Fold 1 Epoch 79 Batch 0: Train Loss = 1.5257




Fold 1, mse = 26.2044, mad = 3.9161




Fold 1 Epoch 80 Batch 0: Train Loss = 1.5028
Fold 1, epoch 80: Loss = 1.4825 Valid loss = 1.1163 MSE = 25.4171 AUROC = 0.9899
Fold 1, mse = 25.4171, mad = 3.8296




Fold 1 Epoch 81 Batch 0: Train Loss = 1.6673
Fold 1, mse = 24.8942, mad = 3.8244




Fold 1 Epoch 82 Batch 0: Train Loss = 1.3661
Fold 1, mse = 28.5682, mad = 4.2261




Fold 1 Epoch 83 Batch 0: Train Loss = 1.6000
Fold 1, mse = 24.9191, mad = 3.7869




Fold 1 Epoch 84 Batch 0: Train Loss = 1.5351
Fold 1, mse = 26.3059, mad = 3.9459




Fold 1 Epoch 85 Batch 0: Train Loss = 1.5136
Fold 1, mse = 26.9889, mad = 3.9934




Fold 1 Epoch 86 Batch 0: Train Loss = 1.4021
------------ Save FOLD-BEST model - MSE: 24.7141 ------------
Custom bins confusion matrix:
[[75 32]
 [18 58]]
Mean absolute deviation (MAD) = 3.79117239455682
Mean squared error (MSE) = 24.714077928855225
Mean absolute percentage error (MAPE) = 596.9404432930271
Cohen kappa score = 0.4520301832554797
------------ Save best model - MSE: 24.7141 ------------
Fold 1, mse = 24.7141, mad = 3.7912




Fold 1 Epoch 87 Batch 0: Train Loss = 1.4929
------------ Save FOLD-BEST model - MSE: 23.7691 ------------
Custom bins confusion matrix:
[[72 35]
 [12 64]]
Mean absolute deviation (MAD) = 3.732939619986895
Mean squared error (MSE) = 23.769140994140784
Mean absolute percentage error (MAPE) = 605.0675293309577
Cohen kappa score = 0.4933733875242976
------------ Save best model - MSE: 23.7691 ------------
Fold 1, mse = 23.7691, mad = 3.7329




Fold 1 Epoch 88 Batch 0: Train Loss = 1.4156
Fold 1, mse = 23.9649, mad = 3.7823




Fold 1 Epoch 89 Batch 0: Train Loss = 1.6541
------------ Save FOLD-BEST model - MSE: 23.4127 ------------
Custom bins confusion matrix:
[[78 29]
 [17 59]]
Mean absolute deviation (MAD) = 3.6963134494293146
Mean squared error (MSE) = 23.41274876004446
Mean absolute percentage error (MAPE) = 594.2069561035305
Cohen kappa score = 0.4939889396489541
------------ Save best model - MSE: 23.4127 ------------
Fold 1, mse = 23.4127, mad = 3.6963




Fold 1 Epoch 90 Batch 0: Train Loss = 1.4252
Fold 1, epoch 90: Loss = 1.3787 Valid loss = 1.0127 MSE = 23.5413 AUROC = 0.9945
Fold 1, mse = 23.5413, mad = 3.6885




Fold 1 Epoch 91 Batch 0: Train Loss = 1.2912
------------ Save FOLD-BEST model - MSE: 22.9594 ------------
Custom bins confusion matrix:
[[75 32]
 [13 63]]
Mean absolute deviation (MAD) = 3.686306268037318
Mean squared error (MSE) = 22.95944621017649
Mean absolute percentage error (MAPE) = 606.9997656590848
Cohen kappa score = 0.511362962083902
------------ Save best model - MSE: 22.9594 ------------
Fold 1, mse = 22.9594, mad = 3.6863




Fold 1 Epoch 92 Batch 0: Train Loss = 1.1303
------------ Save FOLD-BEST model - MSE: 22.2117 ------------
Custom bins confusion matrix:
[[85 22]
 [18 58]]
Mean absolute deviation (MAD) = 3.608130736327931
Mean squared error (MSE) = 22.211746972428237
Mean absolute percentage error (MAPE) = 576.1643711688106
Cohen kappa score = 0.553331706126434
------------ Save best model - MSE: 22.2117 ------------
Fold 1, mse = 22.2117, mad = 3.6081




Fold 1 Epoch 93 Batch 0: Train Loss = 1.2131
Fold 1, mse = 24.6974, mad = 3.6866
Fold 1 Epoch 94 Batch 0: Train Loss = 1.6028




Fold 1, mse = 26.5975, mad = 3.8010
Fold 1 Epoch 95 Batch 0: Train Loss = 1.3727




Fold 1, mse = 23.8616, mad = 3.6720




Fold 1 Epoch 96 Batch 0: Train Loss = 1.2746
Fold 1, mse = 22.6833, mad = 3.6171




Fold 1 Epoch 97 Batch 0: Train Loss = 1.5955
Fold 1, mse = 23.0650, mad = 3.6240




Fold 1 Epoch 98 Batch 0: Train Loss = 1.2705
Fold 1, mse = 22.9537, mad = 3.5914




Fold 1 Epoch 99 Batch 0: Train Loss = 1.2766
Fold 1, mse = 22.8788, mad = 3.6285




Fold 1 Epoch 100 Batch 0: Train Loss = 1.3173
Fold 1, epoch 100: Loss = 1.2635 Valid loss = 0.9408 MSE = 22.0148 AUROC = 0.9942
------------ Save FOLD-BEST model - MSE: 22.0148 ------------
Custom bins confusion matrix:
[[85 22]
 [20 56]]
Mean absolute deviation (MAD) = 3.5147696037432645
Mean squared error (MSE) = 22.014832908455574
Mean absolute percentage error (MAPE) = 541.7930681380575
Cohen kappa score = 0.5292171995589856
------------ Save best model - MSE: 22.0148 ------------
Fold 1, mse = 22.0148, mad = 3.5148




Fold 1 Epoch 101 Batch 0: Train Loss = 1.3448
Fold 1, mse = 22.5316, mad = 3.5907




Fold 1 Epoch 102 Batch 0: Train Loss = 1.3453
Fold 1, mse = 23.4427, mad = 3.5691




Fold 1 Epoch 103 Batch 0: Train Loss = 1.3062
Fold 1, mse = 37.6964, mad = 4.9498




Fold 1 Epoch 104 Batch 0: Train Loss = 1.5037
Fold 1, mse = 22.5592, mad = 3.6137




Fold 1 Epoch 105 Batch 0: Train Loss = 1.2548
Fold 1, mse = 23.4844, mad = 3.6991




Fold 1 Epoch 106 Batch 0: Train Loss = 1.5335
Fold 1, mse = 24.9427, mad = 3.8544




Fold 1 Epoch 107 Batch 0: Train Loss = 1.2126
Fold 1, mse = 24.9032, mad = 3.8478




Fold 1 Epoch 108 Batch 0: Train Loss = 1.3764
Fold 1, mse = 22.5029, mad = 3.5589




Fold 1 Epoch 109 Batch 0: Train Loss = 1.4720
Fold 1, mse = 22.9660, mad = 3.6111




Fold 1 Epoch 110 Batch 0: Train Loss = 1.1144
Fold 1, epoch 110: Loss = 1.2884 Valid loss = 0.9992 MSE = 23.4010 AUROC = 0.9946
Fold 1, mse = 23.4010, mad = 3.5631




Fold 1 Epoch 111 Batch 0: Train Loss = 1.4316
Fold 1, mse = 22.7375, mad = 3.5562
Fold 1 Epoch 112 Batch 0: Train Loss = 1.2794




Fold 1, mse = 28.0109, mad = 4.0305




Fold 1 Epoch 113 Batch 0: Train Loss = 1.5419
Fold 1, mse = 22.0291, mad = 3.5451




Fold 1 Epoch 114 Batch 0: Train Loss = 1.4030
Fold 1, mse = 24.8525, mad = 3.7968




Fold 1 Epoch 115 Batch 0: Train Loss = 1.2263
Fold 1, mse = 24.2840, mad = 3.7346




Fold 1 Epoch 116 Batch 0: Train Loss = 1.0349
Fold 1, mse = 23.0239, mad = 3.5582




Fold 1 Epoch 117 Batch 0: Train Loss = 1.1806
Fold 1, mse = 33.1320, mad = 4.4758




Fold 1 Epoch 118 Batch 0: Train Loss = 1.0580
Fold 1, mse = 23.9945, mad = 3.6719




Fold 1 Epoch 119 Batch 0: Train Loss = 1.4135
Fold 1, mse = 23.1500, mad = 3.5420




Fold 1 Epoch 120 Batch 0: Train Loss = 1.3142
Fold 1, epoch 120: Loss = 1.2067 Valid loss = 0.9773 MSE = 23.2082 AUROC = 0.9964
Fold 1, mse = 23.2082, mad = 3.5354




Fold 1 Epoch 121 Batch 0: Train Loss = 1.2646
Fold 1, mse = 23.0755, mad = 3.5998




Fold 1 Epoch 122 Batch 0: Train Loss = 1.2427
Fold 1, mse = 23.7074, mad = 3.6909




Fold 1 Epoch 123 Batch 0: Train Loss = 1.0817
Fold 1, mse = 23.9498, mad = 3.6886




Fold 1 Epoch 124 Batch 0: Train Loss = 1.0949
Fold 1, mse = 27.3054, mad = 3.9370




Fold 1 Epoch 125 Batch 0: Train Loss = 1.1221
Fold 1, mse = 23.4958, mad = 3.6704




Fold 1 Epoch 126 Batch 0: Train Loss = 1.3755
Fold 1, mse = 22.7890, mad = 3.6124




Fold 1 Epoch 127 Batch 0: Train Loss = 1.2722
Fold 1, mse = 23.3474, mad = 3.6252




Fold 1 Epoch 128 Batch 0: Train Loss = 0.9585
Fold 1, mse = 22.3237, mad = 3.4962




Fold 1 Epoch 129 Batch 0: Train Loss = 1.2055
Fold 1, mse = 22.8680, mad = 3.5351




Fold 1 Epoch 130 Batch 0: Train Loss = 1.1879
Fold 1, epoch 130: Loss = 1.1433 Valid loss = 0.9983 MSE = 23.7305 AUROC = 0.9964
Fold 1, mse = 23.7305, mad = 3.6196




Fold 1 Epoch 131 Batch 0: Train Loss = 1.1285
Fold 1, mse = 23.2734, mad = 3.5819




Fold 1 Epoch 132 Batch 0: Train Loss = 1.0700
Fold 1, mse = 23.0595, mad = 3.5659




Fold 1 Epoch 133 Batch 0: Train Loss = 1.3569
Fold 1, mse = 23.1217, mad = 3.4478




Fold 1 Epoch 134 Batch 0: Train Loss = 1.2505
Fold 1, mse = 22.3433, mad = 3.4793




Fold 1 Epoch 135 Batch 0: Train Loss = 1.1716
Fold 1, mse = 26.5006, mad = 3.8299




Fold 1 Epoch 136 Batch 0: Train Loss = 0.8965
Fold 1, mse = 24.8288, mad = 3.6993




Fold 1 Epoch 137 Batch 0: Train Loss = 1.1204
Fold 1, mse = 26.7255, mad = 3.8896




Fold 1 Epoch 138 Batch 0: Train Loss = 0.9684
Fold 1, mse = 23.3525, mad = 3.5339




Fold 1 Epoch 139 Batch 0: Train Loss = 1.4345
Fold 1, mse = 22.5656, mad = 3.4737




Fold 1 Epoch 140 Batch 0: Train Loss = 0.9871
Fold 1, epoch 140: Loss = 1.0970 Valid loss = 1.1757 MSE = 28.4542 AUROC = 0.9960
Fold 1, mse = 28.4542, mad = 3.9231




Fold 1 Epoch 141 Batch 0: Train Loss = 1.2503
Fold 1, mse = 22.7338, mad = 3.4603




Fold 1 Epoch 142 Batch 0: Train Loss = 1.1074
Fold 1, mse = 23.7386, mad = 3.4564




Fold 1 Epoch 143 Batch 0: Train Loss = 1.1577
------------ Save FOLD-BEST model - MSE: 20.4020 ------------
Custom bins confusion matrix:
[[87 20]
 [17 59]]
Mean absolute deviation (MAD) = 3.3882477341991835
Mean squared error (MSE) = 20.402014043678193
Mean absolute percentage error (MAPE) = 499.36120746920795
Cohen kappa score = 0.5860487864522834
------------ Save best model - MSE: 20.4020 ------------
Fold 1, mse = 20.4020, mad = 3.3882




Fold 1 Epoch 144 Batch 0: Train Loss = 1.2202
Fold 1, mse = 22.3006, mad = 3.5938




Fold 1 Epoch 145 Batch 0: Train Loss = 1.1432
Fold 1, mse = 21.4735, mad = 3.4806




Fold 1 Epoch 146 Batch 0: Train Loss = 0.8854
Fold 1, mse = 22.2037, mad = 3.4684
Fold 1 Epoch 147 Batch 0: Train Loss = 1.1716




Fold 1, mse = 21.7768, mad = 3.4547




Fold 1 Epoch 148 Batch 0: Train Loss = 0.9464
Fold 1, mse = 22.6045, mad = 3.5771




Fold 1 Epoch 149 Batch 0: Train Loss = 1.0601
Fold 1, mse = 24.9376, mad = 3.8158
model_module_idx is 0
model_module_idx is 0
model_module_idx is 0
model_module_idx is 0
model_module_idx is 1
model_module_idx is 1
model_module_idx is 1
model_module_idx is 1
model_module_idx is 2
model_module_idx is 2
model_module_idx is 2
model_module_idx is 2
model_module_idx is 3
model_module_idx is 3
model_module_idx is 3
model_module_idx is 3
model_module_idx is 4
model_module_idx is 4
model_module_idx is 4
model_module_idx is 4
model_module_idx is 5
model_module_idx is 5
model_module_idx is 5
model_module_idx is 5
model_module_idx is 6
model_module_idx is 6
model_module_idx is 6
model_module_idx is 6
model_module_idx is 7
model_module_idx is 7
model_module_idx is 7
model_module_idx is 7
model_module_idx is 8
model_module_idx is 8
model_module_idx is 8
model_module_idx is 8
model_module_idx is 9
model_module_idx is 9
model_module_idx is 9
model_module_idx is 9
model_module_idx is 10
model_module_id



Fold 2 Epoch 0 Batch 0: Train Loss = 172655.0938
Fold 2, epoch 0: Loss = 2.4442 Valid loss = 2.3788 MSE = 44.3037 AUROC = 0.6078
------------ Save FOLD-BEST model - MSE: 44.3037 ------------
Custom bins confusion matrix:
[[66 41  0]
 [15 58  0]
 [ 0  1  0]]
Mean absolute deviation (MAD) = 5.042908963758486
Mean squared error (MSE) = 44.3036545128857
Mean absolute percentage error (MAPE) = 506.237152462784
Cohen kappa score = 0.3886222222222222
Fold 2, mse = 44.3037, mad = 5.0429




Fold 2 Epoch 1 Batch 0: Train Loss = 21093.5898
Fold 2, mse = 44.5993, mad = 5.1844




Fold 2 Epoch 2 Batch 0: Train Loss = 7026.3374
------------ Save FOLD-BEST model - MSE: 42.3972 ------------
Custom bins confusion matrix:
[[69 38  0]
 [18 55  0]
 [ 0  1  0]]
Mean absolute deviation (MAD) = 4.883968402264292
Mean squared error (MSE) = 42.397177191061104
Mean absolute percentage error (MAPE) = 492.47630404230136
Cohen kappa score = 0.38136355459615034
Fold 2, mse = 42.3972, mad = 4.8840


  prec1 = cf[1][1] / (cf[1][1] + cf[0][1])


Fold 2 Epoch 3 Batch 0: Train Loss = 3609.3608
Fold 2, mse = 42.4411, mad = 5.0624


  prec1 = cf[1][1] / (cf[1][1] + cf[0][1])


Fold 2 Epoch 4 Batch 0: Train Loss = 1417.0157
------------ Save FOLD-BEST model - MSE: 40.9855 ------------
Custom bins confusion matrix:
[[67 40  0]
 [16 57  0]
 [ 0  1  0]]
Mean absolute deviation (MAD) = 4.844107270120936
Mean squared error (MSE) = 40.98554534077342
Mean absolute percentage error (MAPE) = 510.3177990721524
Cohen kappa score = 0.3862216669641264
Fold 2, mse = 40.9855, mad = 4.8441


  prec1 = cf[1][1] / (cf[1][1] + cf[0][1])


Fold 2 Epoch 5 Batch 0: Train Loss = 832.9252
Fold 2, mse = 41.2072, mad = 4.9590


  prec1 = cf[1][1] / (cf[1][1] + cf[0][1])


Fold 2 Epoch 6 Batch 0: Train Loss = 391.5498
------------ Save FOLD-BEST model - MSE: 40.7501 ------------
Custom bins confusion matrix:
[[58 49  0]
 [11 62  0]
 [ 0  1  0]]
Mean absolute deviation (MAD) = 4.900570070447645
Mean squared error (MSE) = 40.75005741908632
Mean absolute percentage error (MAPE) = 532.594686774458
Cohen kappa score = 0.3607202825545712
Fold 2, mse = 40.7501, mad = 4.9006


  prec1 = cf[1][1] / (cf[1][1] + cf[0][1])


Fold 2 Epoch 7 Batch 0: Train Loss = 231.0954
------------ Save FOLD-BEST model - MSE: 40.0814 ------------
Custom bins confusion matrix:
[[68 39  0]
 [16 57  0]
 [ 0  1  0]]
Mean absolute deviation (MAD) = 4.795881565550305
Mean squared error (MSE) = 40.081418302786936
Mean absolute percentage error (MAPE) = 512.8705434606045
Cohen kappa score = 0.3958035288507391
Fold 2, mse = 40.0814, mad = 4.7959


  prec1 = cf[1][1] / (cf[1][1] + cf[0][1])


Fold 2 Epoch 8 Batch 0: Train Loss = 104.3355
Fold 2, mse = 40.8623, mad = 4.9834


  prec1 = cf[1][1] / (cf[1][1] + cf[0][1])


Fold 2 Epoch 9 Batch 0: Train Loss = 58.2447
Fold 2, mse = 40.8604, mad = 5.0164


  prec1 = cf[1][1] / (cf[1][1] + cf[0][1])


Fold 2 Epoch 10 Batch 0: Train Loss = 30.8005
Fold 2, epoch 10: Loss = 2.2675 Valid loss = 2.1816 MSE = 40.6544 AUROC = 0.6694
Fold 2, mse = 40.6544, mad = 5.0282


  prec1 = cf[1][1] / (cf[1][1] + cf[0][1])


Fold 2 Epoch 11 Batch 0: Train Loss = 16.6512
------------ Save FOLD-BEST model - MSE: 39.5041 ------------
Custom bins confusion matrix:
[[60 47  0]
 [12 61  0]
 [ 0  1  0]]
Mean absolute deviation (MAD) = 4.8252535577694795
Mean squared error (MSE) = 39.50405682468253
Mean absolute percentage error (MAPE) = 529.1214774587678
Cohen kappa score = 0.36757512229210343
Fold 2, mse = 39.5041, mad = 4.8253




Fold 2 Epoch 12 Batch 0: Train Loss = 9.1676
Fold 2, mse = 39.5529, mad = 4.8109




Fold 2 Epoch 13 Batch 0: Train Loss = 5.9280
Fold 2, mse = 40.0256, mad = 5.0225




Fold 2 Epoch 14 Batch 0: Train Loss = 3.9688
Fold 2, mse = 43.5617, mad = 5.4547




Fold 2 Epoch 15 Batch 0: Train Loss = 3.4186
------------ Save FOLD-BEST model - MSE: 38.5791 ------------
Custom bins confusion matrix:
[[64 43  0]
 [14 59  0]
 [ 0  1  0]]
Mean absolute deviation (MAD) = 4.7276338231790636
Mean squared error (MSE) = 38.57913592868879
Mean absolute percentage error (MAPE) = 515.7829872852742
Cohen kappa score = 0.3815246848120655
Fold 2, mse = 38.5791, mad = 4.7276




Fold 2 Epoch 16 Batch 0: Train Loss = 3.2606
Fold 2, mse = 39.9755, mad = 4.9932




Fold 2 Epoch 17 Batch 0: Train Loss = 2.4281
Fold 2, mse = 39.0921, mad = 4.8418




Fold 2 Epoch 18 Batch 0: Train Loss = 2.4775
Fold 2, mse = 41.0720, mad = 5.1625




Fold 2 Epoch 19 Batch 0: Train Loss = 2.8197
------------ Save FOLD-BEST model - MSE: 38.1362 ------------
Custom bins confusion matrix:
[[60 47  0]
 [11 62  0]
 [ 0  1  0]]
Mean absolute deviation (MAD) = 4.7220951377565665
Mean squared error (MSE) = 38.13624314913447
Mean absolute percentage error (MAPE) = 511.14582985060827
Cohen kappa score = 0.37930834059866314
Fold 2, mse = 38.1362, mad = 4.7221




Fold 2 Epoch 20 Batch 0: Train Loss = 2.2692
Fold 2, epoch 20: Loss = 2.2112 Valid loss = 2.2183 MSE = 42.3600 AUROC = 0.7315
Fold 2, mse = 42.3600, mad = 5.3267
Fold 2 Epoch 21 Batch 0: Train Loss = 2.5887




Fold 2, mse = 38.4518, mad = 4.7390




Fold 2 Epoch 22 Batch 0: Train Loss = 2.5483
Fold 2, mse = 39.7972, mad = 5.0050




Fold 2 Epoch 23 Batch 0: Train Loss = 2.3845
Fold 2, mse = 38.5662, mad = 4.8687




Fold 2 Epoch 24 Batch 0: Train Loss = 2.2433
Fold 2, mse = 40.6644, mad = 5.1296




Fold 2 Epoch 25 Batch 0: Train Loss = 2.6564
Fold 2, mse = 38.6138, mad = 4.8469




Fold 2 Epoch 26 Batch 0: Train Loss = 2.4027
Fold 2, mse = 40.8451, mad = 5.1505




Fold 2 Epoch 27 Batch 0: Train Loss = 2.1623
Fold 2, mse = 38.5838, mad = 4.8432




Fold 2 Epoch 28 Batch 0: Train Loss = 2.0047
Fold 2, mse = 38.5019, mad = 4.8477




Fold 2 Epoch 29 Batch 0: Train Loss = 2.1649
Fold 2, mse = 41.8510, mad = 5.2865




Fold 2 Epoch 30 Batch 0: Train Loss = 2.2911
Fold 2, epoch 30: Loss = 2.0417 Valid loss = 1.9042 MSE = 36.9749 AUROC = 0.8466
------------ Save FOLD-BEST model - MSE: 36.9749 ------------
Custom bins confusion matrix:
[[66 41  0]
 [15 58  0]
 [ 0  1  0]]
Mean absolute deviation (MAD) = 4.599579312265214
Mean squared error (MSE) = 36.97491332008014
Mean absolute percentage error (MAPE) = 488.5481479189946
Cohen kappa score = 0.3886222222222222
Fold 2, mse = 36.9749, mad = 4.5996




Fold 2 Epoch 31 Batch 0: Train Loss = 2.4777
Fold 2, mse = 37.9795, mad = 4.7770




Fold 2 Epoch 32 Batch 0: Train Loss = 1.8305
Fold 2, mse = 40.6170, mad = 5.1276




Fold 2 Epoch 33 Batch 0: Train Loss = 2.0527
Fold 2, mse = 37.4526, mad = 4.7067




Fold 2 Epoch 34 Batch 0: Train Loss = 2.1757
Fold 2, mse = 39.0306, mad = 4.9299




Fold 2 Epoch 35 Batch 0: Train Loss = 2.1695
------------ Save FOLD-BEST model - MSE: 36.8313 ------------
Custom bins confusion matrix:
[[61 46  0]
 [11 62  0]
 [ 0  1  0]]
Mean absolute deviation (MAD) = 4.643988178391534
Mean squared error (MSE) = 36.831292801600796
Mean absolute percentage error (MAPE) = 497.23604378463347
Cohen kappa score = 0.3886559515490333
Fold 2, mse = 36.8313, mad = 4.6440




Fold 2 Epoch 36 Batch 0: Train Loss = 2.1251
Fold 2, mse = 39.2916, mad = 4.9824




Fold 2 Epoch 37 Batch 0: Train Loss = 1.8879
------------ Save FOLD-BEST model - MSE: 36.6040 ------------
Custom bins confusion matrix:
[[59 48  0]
 [10 63  0]
 [ 0  1  0]]
Mean absolute deviation (MAD) = 4.645237050075293
Mean squared error (MSE) = 36.604042694761475
Mean absolute percentage error (MAPE) = 499.487633014679
Cohen kappa score = 0.3816802732904869
Fold 2, mse = 36.6040, mad = 4.6452




Fold 2 Epoch 38 Batch 0: Train Loss = 1.8510
Fold 2, mse = 38.4331, mad = 4.9076




Fold 2 Epoch 39 Batch 0: Train Loss = 1.9848
Fold 2, mse = 37.0875, mad = 4.7085




Fold 2 Epoch 40 Batch 0: Train Loss = 2.1013
Fold 2, epoch 40: Loss = 1.9690 Valid loss = 1.7911 MSE = 36.4589 AUROC = 0.9158
------------ Save FOLD-BEST model - MSE: 36.4589 ------------
Custom bins confusion matrix:
[[59 48  0]
 [ 9 64  0]
 [ 0  1  0]]
Mean absolute deviation (MAD) = 4.63593676150182
Mean squared error (MSE) = 36.45894808850071
Mean absolute percentage error (MAPE) = 499.60753053799874
Cohen kappa score = 0.39331946370781323
Fold 2, mse = 36.4589, mad = 4.6359




Fold 2 Epoch 41 Batch 0: Train Loss = 1.8107
Fold 2, mse = 38.4128, mad = 4.9035




Fold 2 Epoch 42 Batch 0: Train Loss = 2.4119
------------ Save FOLD-BEST model - MSE: 35.9901 ------------
Custom bins confusion matrix:
[[62 45  0]
 [16 57  0]
 [ 0  1  0]]
Mean absolute deviation (MAD) = 4.549159426208631
Mean squared error (MSE) = 35.99008225210154
Mean absolute percentage error (MAPE) = 483.8584854825454
Cohen kappa score = 0.3388712147991044
Fold 2, mse = 35.9901, mad = 4.5492




Fold 2 Epoch 43 Batch 0: Train Loss = 1.8954
Fold 2, mse = 36.8635, mad = 4.7042




Fold 2 Epoch 44 Batch 0: Train Loss = 1.7495
Fold 2, mse = 36.0121, mad = 4.5846




Fold 2 Epoch 45 Batch 0: Train Loss = 1.5104
Fold 2, mse = 37.7509, mad = 4.8011




Fold 2 Epoch 46 Batch 0: Train Loss = 1.9796
Fold 2, mse = 36.4023, mad = 4.6428




Fold 2 Epoch 47 Batch 0: Train Loss = 2.0926
------------ Save FOLD-BEST model - MSE: 35.7974 ------------
Custom bins confusion matrix:
[[65 42  0]
 [15 58  0]
 [ 0  1  0]]
Mean absolute deviation (MAD) = 4.543353349410191
Mean squared error (MSE) = 35.79743814801541
Mean absolute percentage error (MAPE) = 488.9831877351917
Cohen kappa score = 0.37911048024603744
Fold 2, mse = 35.7974, mad = 4.5434




Fold 2 Epoch 48 Batch 0: Train Loss = 1.5432
Fold 2, mse = 37.1150, mad = 4.6998




Fold 2 Epoch 49 Batch 0: Train Loss = 1.8267
------------ Save FOLD-BEST model - MSE: 35.5387 ------------
Custom bins confusion matrix:
[[68 39  0]
 [14 59  0]
 [ 0  1  0]]
Mean absolute deviation (MAD) = 4.495458961765872
Mean squared error (MSE) = 35.53870449741035
Mean absolute percentage error (MAPE) = 474.5233953372429
Cohen kappa score = 0.4196651229070182
Fold 2, mse = 35.5387, mad = 4.4955




Fold 2 Epoch 50 Batch 0: Train Loss = 1.9349
Fold 2, epoch 50: Loss = 1.8127 Valid loss = 1.6859 MSE = 35.4927 AUROC = 0.9512
------------ Save FOLD-BEST model - MSE: 35.4927 ------------
Custom bins confusion matrix:
[[65 42  0]
 [12 61  0]
 [ 0  1  0]]
Mean absolute deviation (MAD) = 4.571582110343703
Mean squared error (MSE) = 35.49271991982418
Mean absolute percentage error (MAPE) = 491.28176110975363
Cohen kappa score = 0.41465279002763566
Fold 2, mse = 35.4927, mad = 4.5716




Fold 2 Epoch 51 Batch 0: Train Loss = 1.9702
------------ Save FOLD-BEST model - MSE: 35.3583 ------------
Custom bins confusion matrix:
[[69 38  0]
 [14 59  0]
 [ 0  1  0]]
Mean absolute deviation (MAD) = 4.517391948970295
Mean squared error (MSE) = 35.35831885210695
Mean absolute percentage error (MAPE) = 480.39278058876096
Cohen kappa score = 0.42929383068594207
Fold 2, mse = 35.3583, mad = 4.5174




Fold 2 Epoch 52 Batch 0: Train Loss = 1.6151
Fold 2, mse = 35.5998, mad = 4.6040




Fold 2 Epoch 53 Batch 0: Train Loss = 1.9030
------------ Save FOLD-BEST model - MSE: 35.2974 ------------
Custom bins confusion matrix:
[[75 32  0]
 [18 55  0]
 [ 0  1  0]]
Mean absolute deviation (MAD) = 4.381240431060265
Mean squared error (MSE) = 35.29737100530267
Mean absolute percentage error (MAPE) = 454.7296083790374
Cohen kappa score = 0.4398325141088658
Fold 2, mse = 35.2974, mad = 4.3812




Fold 2 Epoch 54 Batch 0: Train Loss = 1.6149
Fold 2, mse = 35.4285, mad = 4.5301




Fold 2 Epoch 55 Batch 0: Train Loss = 1.7258
Fold 2, mse = 35.3524, mad = 4.4179




Fold 2 Epoch 56 Batch 0: Train Loss = 1.7917
Fold 2, mse = 35.6376, mad = 4.6686




Fold 2 Epoch 57 Batch 0: Train Loss = 1.9505
------------ Save FOLD-BEST model - MSE: 34.7369 ------------
Custom bins confusion matrix:
[[81 26  0]
 [19 54  0]
 [ 0  1  0]]
Mean absolute deviation (MAD) = 4.289099263824696
Mean squared error (MSE) = 34.73690208650627
Mean absolute percentage error (MAPE) = 437.3341039044026
Cohen kappa score = 0.4875677006400788
Fold 2, mse = 34.7369, mad = 4.2891




Fold 2 Epoch 58 Batch 0: Train Loss = 1.9385
Fold 2, mse = 35.9662, mad = 4.6601




Fold 2 Epoch 59 Batch 0: Train Loss = 1.7922
Fold 2, mse = 35.6602, mad = 4.5001




Fold 2 Epoch 60 Batch 0: Train Loss = 1.6125
Fold 2, epoch 60: Loss = 1.7285 Valid loss = 1.6062 MSE = 35.3321 AUROC = 0.9707
Fold 2, mse = 35.3321, mad = 4.4289




Fold 2 Epoch 61 Batch 0: Train Loss = 1.4851
Fold 2, mse = 36.0455, mad = 4.6337




Fold 2 Epoch 62 Batch 0: Train Loss = 1.5817
Fold 2, mse = 35.3843, mad = 4.3416




Fold 2 Epoch 63 Batch 0: Train Loss = 1.5147
Fold 2, mse = 34.8826, mad = 4.4921




Fold 2 Epoch 64 Batch 0: Train Loss = 1.7151
Fold 2, mse = 34.8762, mad = 4.3510




Fold 2 Epoch 65 Batch 0: Train Loss = 1.3503
Fold 2, mse = 35.0034, mad = 4.5367




Fold 2 Epoch 66 Batch 0: Train Loss = 1.5531
Fold 2, mse = 35.2501, mad = 4.4847




Fold 2 Epoch 67 Batch 0: Train Loss = 1.3920
------------ Save FOLD-BEST model - MSE: 34.5403 ------------
Custom bins confusion matrix:
[[71 36  0]
 [16 57  0]
 [ 0  1  0]]
Mean absolute deviation (MAD) = 4.4194399482200915
Mean squared error (MSE) = 34.54028139303608
Mean absolute percentage error (MAPE) = 465.84176337293866
Cohen kappa score = 0.4247766384841397
Fold 2, mse = 34.5403, mad = 4.4194




Fold 2 Epoch 68 Batch 0: Train Loss = 1.5457
Fold 2, mse = 34.5577, mad = 4.3615




Fold 2 Epoch 69 Batch 0: Train Loss = 1.5277
Fold 2, mse = 36.5204, mad = 4.5839




Fold 2 Epoch 70 Batch 0: Train Loss = 1.5328
Fold 2, epoch 70: Loss = 1.5694 Valid loss = 1.6095 MSE = 36.3323 AUROC = 0.9837
Fold 2, mse = 36.3323, mad = 4.5522




Fold 2 Epoch 71 Batch 0: Train Loss = 1.4984
Fold 2, mse = 35.7242, mad = 4.5421




Fold 2 Epoch 72 Batch 0: Train Loss = 1.4417
Fold 2, mse = 36.6573, mad = 4.6250




Fold 2 Epoch 73 Batch 0: Train Loss = 1.5225
Fold 2, mse = 36.0938, mad = 4.5178




Fold 2 Epoch 74 Batch 0: Train Loss = 1.4163
Fold 2, mse = 34.8368, mad = 4.3060




Fold 2 Epoch 75 Batch 0: Train Loss = 1.5786
------------ Save FOLD-BEST model - MSE: 33.8557 ------------
Custom bins confusion matrix:
[[86 21  0]
 [23 50  0]
 [ 0  1  0]]
Mean absolute deviation (MAD) = 4.219170386501629
Mean squared error (MSE) = 33.85574111217557
Mean absolute percentage error (MAPE) = 414.37888250107795
Cohen kappa score = 0.48937370697761895
Fold 2, mse = 33.8557, mad = 4.2192




Fold 2 Epoch 76 Batch 0: Train Loss = 1.9061
Fold 2, mse = 36.3879, mad = 4.5943
Fold 2 Epoch 77 Batch 0: Train Loss = 1.6062




Fold 2, mse = 38.3517, mad = 4.7125




Fold 2 Epoch 78 Batch 0: Train Loss = 1.4664
Fold 2, mse = 36.5198, mad = 4.6004




Fold 2 Epoch 79 Batch 0: Train Loss = 1.4439
Fold 2, mse = 35.9441, mad = 4.5829




Fold 2 Epoch 80 Batch 0: Train Loss = 1.4590
Fold 2, epoch 80: Loss = 1.4672 Valid loss = 1.5237 MSE = 35.9203 AUROC = 0.9903
Fold 2, mse = 35.9203, mad = 4.6059




Fold 2 Epoch 81 Batch 0: Train Loss = 1.4625
Fold 2, mse = 34.6574, mad = 4.2856




Fold 2 Epoch 82 Batch 0: Train Loss = 1.1757
Fold 2, mse = 36.0051, mad = 4.2329




Fold 2 Epoch 83 Batch 0: Train Loss = 1.8654
Fold 2, mse = 35.6177, mad = 4.6469




Fold 2 Epoch 84 Batch 0: Train Loss = 1.3056
Fold 2, mse = 34.4222, mad = 4.5068




Fold 2 Epoch 85 Batch 0: Train Loss = 1.4217
Fold 2, mse = 34.8787, mad = 4.4301




Fold 2 Epoch 86 Batch 0: Train Loss = 1.2864
Fold 2, mse = 34.0404, mad = 4.3532




Fold 2 Epoch 87 Batch 0: Train Loss = 1.2565
Fold 2, mse = 34.3643, mad = 4.1727




Fold 2 Epoch 88 Batch 0: Train Loss = 1.3911
Fold 2, mse = 34.8394, mad = 4.5023




Fold 2 Epoch 89 Batch 0: Train Loss = 1.4057
Fold 2, mse = 33.9726, mad = 4.4184




Fold 2 Epoch 90 Batch 0: Train Loss = 1.2796
Fold 2, epoch 90: Loss = 1.3242 Valid loss = 1.4618 MSE = 33.9049 AUROC = 0.9883
Fold 2, mse = 33.9049, mad = 4.2050




Fold 2 Epoch 91 Batch 0: Train Loss = 1.1797
Fold 2, mse = 34.3478, mad = 4.4498




Fold 2 Epoch 92 Batch 0: Train Loss = 1.2745
Fold 2, mse = 34.4668, mad = 4.3581




Fold 2 Epoch 93 Batch 0: Train Loss = 1.4569
Fold 2, mse = 35.4309, mad = 4.4921




Fold 2 Epoch 94 Batch 0: Train Loss = 1.4370
Fold 2, mse = 34.0031, mad = 4.4593




Fold 2 Epoch 95 Batch 0: Train Loss = 1.2984
Fold 2, mse = 34.8009, mad = 4.1853




Fold 2 Epoch 96 Batch 0: Train Loss = 1.2084
------------ Save FOLD-BEST model - MSE: 33.5198 ------------
Custom bins confusion matrix:
[[82 25  0]
 [24 49  0]
 [ 0  1  0]]
Mean absolute deviation (MAD) = 4.197056836175334
Mean squared error (MSE) = 33.51981425703964
Mean absolute percentage error (MAPE) = 413.9161188418694
Cohen kappa score = 0.4361370716510903
Fold 2, mse = 33.5198, mad = 4.1971




Fold 2 Epoch 97 Batch 0: Train Loss = 1.2098
Fold 2, mse = 34.3122, mad = 4.4048




Fold 2 Epoch 98 Batch 0: Train Loss = 1.4849
Fold 2, mse = 34.7485, mad = 4.4485




Fold 2 Epoch 99 Batch 0: Train Loss = 1.4789
Fold 2, mse = 34.6080, mad = 4.4706




Fold 2 Epoch 100 Batch 0: Train Loss = 1.2187
Fold 2, epoch 100: Loss = 1.3276 Valid loss = 1.3859 MSE = 32.6526 AUROC = 0.9911
------------ Save FOLD-BEST model - MSE: 32.6526 ------------
Custom bins confusion matrix:
[[78 29  0]
 [20 53  0]
 [ 0  1  0]]
Mean absolute deviation (MAD) = 4.2234053516659245
Mean squared error (MSE) = 32.65264078094233
Mean absolute percentage error (MAPE) = 428.46643684734164
Cohen kappa score = 0.4452617383842099
Fold 2, mse = 32.6526, mad = 4.2234




Fold 2 Epoch 101 Batch 0: Train Loss = 1.5141
Fold 2, mse = 33.0948, mad = 4.2091




Fold 2 Epoch 102 Batch 0: Train Loss = 1.3682
Fold 2, mse = 33.4067, mad = 4.1097




Fold 2 Epoch 103 Batch 0: Train Loss = 1.2861
Fold 2, mse = 32.9089, mad = 4.0603




Fold 2 Epoch 104 Batch 0: Train Loss = 1.3292
------------ Save FOLD-BEST model - MSE: 32.6410 ------------
Custom bins confusion matrix:
[[82 25  0]
 [22 51  0]
 [ 0  1  0]]
Mean absolute deviation (MAD) = 4.163910395640811
Mean squared error (MSE) = 32.64100928347163
Mean absolute percentage error (MAPE) = 417.2160459141008
Cohen kappa score = 0.4609084139985108
Fold 2, mse = 32.6410, mad = 4.1639




Fold 2 Epoch 105 Batch 0: Train Loss = 1.1470
Fold 2, mse = 33.9037, mad = 4.3861




Fold 2 Epoch 106 Batch 0: Train Loss = 1.2486
Fold 2, mse = 44.8744, mad = 5.3275




Fold 2 Epoch 107 Batch 0: Train Loss = 1.4911
Fold 2, mse = 33.5729, mad = 4.2216




Fold 2 Epoch 108 Batch 0: Train Loss = 1.2058
------------ Save FOLD-BEST model - MSE: 32.3778 ------------
Custom bins confusion matrix:
[[86 21  0]
 [24 49  0]
 [ 0  1  0]]
Mean absolute deviation (MAD) = 4.0931984231657585
Mean squared error (MSE) = 32.37783589805823
Mean absolute percentage error (MAPE) = 398.7777330098667
Cohen kappa score = 0.47694433974117345
Fold 2, mse = 32.3778, mad = 4.0932




Fold 2 Epoch 109 Batch 0: Train Loss = 1.2479
Fold 2, mse = 33.0117, mad = 4.2831




Fold 2 Epoch 110 Batch 0: Train Loss = 1.2851
Fold 2, epoch 110: Loss = 1.2469 Valid loss = 1.4645 MSE = 34.7138 AUROC = 0.9900
Fold 2, mse = 34.7138, mad = 4.4513




Fold 2 Epoch 111 Batch 0: Train Loss = 1.3177
Fold 2, mse = 32.4188, mad = 4.1010




Fold 2 Epoch 112 Batch 0: Train Loss = 1.2705
------------ Save FOLD-BEST model - MSE: 31.7669 ------------
Custom bins confusion matrix:
[[82 25  0]
 [22 51  0]
 [ 0  1  0]]
Mean absolute deviation (MAD) = 4.132629665995029
Mean squared error (MSE) = 31.766947930261875
Mean absolute percentage error (MAPE) = 405.95779117511216
Cohen kappa score = 0.4609084139985108
Fold 2, mse = 31.7669, mad = 4.1326




Fold 2 Epoch 113 Batch 0: Train Loss = 1.1259
Fold 2, mse = 33.3440, mad = 4.2508




Fold 2 Epoch 114 Batch 0: Train Loss = 1.2667
Fold 2, mse = 35.4252, mad = 4.3844




Fold 2 Epoch 115 Batch 0: Train Loss = 1.2093
------------ Save FOLD-BEST model - MSE: 31.4062 ------------
Custom bins confusion matrix:
[[81 26  0]
 [18 55  0]
 [ 0  1  0]]
Mean absolute deviation (MAD) = 4.1640694980795
Mean squared error (MSE) = 31.406237066986165
Mean absolute percentage error (MAPE) = 414.09259739359055
Cohen kappa score = 0.4997236042012162
Fold 2, mse = 31.4062, mad = 4.1641




Fold 2 Epoch 116 Batch 0: Train Loss = 1.2808
Fold 2, mse = 32.8298, mad = 4.0939




Fold 2 Epoch 117 Batch 0: Train Loss = 1.1495
Fold 2, mse = 35.8772, mad = 4.6209




Fold 2 Epoch 118 Batch 0: Train Loss = 1.4948
Fold 2, mse = 33.2026, mad = 4.3682




Fold 2 Epoch 119 Batch 0: Train Loss = 1.1315
Fold 2, mse = 34.9159, mad = 4.4545




Fold 2 Epoch 120 Batch 0: Train Loss = 1.1785
Fold 2, epoch 120: Loss = 1.1800 Valid loss = 1.3553 MSE = 32.2586 AUROC = 0.9901
Fold 2, mse = 32.2586, mad = 4.1636
Fold 2 Epoch 121 Batch 0: Train Loss = 1.1329




Fold 2, mse = 32.7206, mad = 4.2371
Fold 2 Epoch 122 Batch 0: Train Loss = 1.0954




------------ Save FOLD-BEST model - MSE: 30.9338 ------------
Custom bins confusion matrix:
[[81 26  0]
 [20 53  0]
 [ 0  1  0]]
Mean absolute deviation (MAD) = 4.053684989280747
Mean squared error (MSE) = 30.93379678434462
Mean absolute percentage error (MAPE) = 398.6558690207465
Cohen kappa score = 0.47536231884057967
Fold 2, mse = 30.9338, mad = 4.0537




Fold 2 Epoch 123 Batch 0: Train Loss = 1.5239
Fold 2, mse = 36.5067, mad = 4.8640




Fold 2 Epoch 124 Batch 0: Train Loss = 1.2858
Fold 2, mse = 32.3906, mad = 4.0065




Fold 2 Epoch 125 Batch 0: Train Loss = 1.2248
Fold 2, mse = 36.3425, mad = 4.6497




Fold 2 Epoch 126 Batch 0: Train Loss = 1.2608
Fold 2, mse = 31.3593, mad = 4.0415




Fold 2 Epoch 127 Batch 0: Train Loss = 1.1363
------------ Save FOLD-BEST model - MSE: 30.8532 ------------
Custom bins confusion matrix:
[[80 27  0]
 [19 54  0]
 [ 0  1  0]]
Mean absolute deviation (MAD) = 4.041696720549267
Mean squared error (MSE) = 30.85323296671485
Mean absolute percentage error (MAPE) = 404.3162292176769
Cohen kappa score = 0.47748909772127024
Fold 2, mse = 30.8532, mad = 4.0417




Fold 2 Epoch 128 Batch 0: Train Loss = 1.0681
Fold 2, mse = 35.4425, mad = 4.6076




Fold 2 Epoch 129 Batch 0: Train Loss = 1.2087
Fold 2, mse = 31.6376, mad = 3.9214




Fold 2 Epoch 130 Batch 0: Train Loss = 1.1587
Fold 2, epoch 130: Loss = 1.3182 Valid loss = 1.4388 MSE = 33.0711 AUROC = 0.9864
Fold 2, mse = 33.0711, mad = 4.1560




Fold 2 Epoch 131 Batch 0: Train Loss = 1.1031
Fold 2, mse = 32.4060, mad = 4.2423




Fold 2 Epoch 132 Batch 0: Train Loss = 1.0501
Fold 2, mse = 31.6188, mad = 3.9745




Fold 2 Epoch 133 Batch 0: Train Loss = 1.3722
Fold 2, mse = 30.9741, mad = 4.0034




Fold 2 Epoch 134 Batch 0: Train Loss = 1.1821
Fold 2, mse = 32.4070, mad = 4.2302




Fold 2 Epoch 135 Batch 0: Train Loss = 1.2008
Fold 2, mse = 31.3365, mad = 3.9588




Fold 2 Epoch 136 Batch 0: Train Loss = 1.0656
------------ Save FOLD-BEST model - MSE: 30.4324 ------------
Custom bins confusion matrix:
[[84 23  0]
 [21 52  0]
 [ 0  1  0]]
Mean absolute deviation (MAD) = 3.9799900856434625
Mean squared error (MSE) = 30.432350014600594
Mean absolute percentage error (MAPE) = 391.746420944372
Cohen kappa score = 0.493564633463906
Fold 2, mse = 30.4324, mad = 3.9800




Fold 2 Epoch 137 Batch 0: Train Loss = 1.0387
Fold 2, mse = 31.0032, mad = 3.8982




Fold 2 Epoch 138 Batch 0: Train Loss = 0.9918
Fold 2, mse = 34.6882, mad = 4.5398




Fold 2 Epoch 139 Batch 0: Train Loss = 1.0651
Fold 2, mse = 31.2664, mad = 4.0060




Fold 2 Epoch 140 Batch 0: Train Loss = 1.0551
Fold 2, epoch 140: Loss = 1.1014 Valid loss = 1.3241 MSE = 30.5605 AUROC = 0.9885
Fold 2, mse = 30.5605, mad = 4.0228




Fold 2 Epoch 141 Batch 0: Train Loss = 1.0575
------------ Save FOLD-BEST model - MSE: 30.1616 ------------
Custom bins confusion matrix:
[[89 18  0]
 [22 51  0]
 [ 0  1  0]]
Mean absolute deviation (MAD) = 3.869544302470698
Mean squared error (MSE) = 30.16159667645393
Mean absolute percentage error (MAPE) = 376.15371632948006
Cohen kappa score = 0.5328297135662574
Fold 2, mse = 30.1616, mad = 3.8695




Fold 2 Epoch 142 Batch 0: Train Loss = 1.0250
Fold 2, mse = 32.5394, mad = 4.0739




Fold 2 Epoch 143 Batch 0: Train Loss = 1.2851
Fold 2, mse = 30.4633, mad = 4.0667




Fold 2 Epoch 144 Batch 0: Train Loss = 0.8542
Fold 2, mse = 30.4063, mad = 3.9477
Fold 2 Epoch 145 Batch 0: Train Loss = 1.0462




Fold 2, mse = 30.8299, mad = 3.8612




Fold 2 Epoch 146 Batch 0: Train Loss = 1.1996
------------ Save FOLD-BEST model - MSE: 29.9896 ------------
Custom bins confusion matrix:
[[76 31  0]
 [13 60  0]
 [ 0  1  0]]
Mean absolute deviation (MAD) = 4.165160964678719
Mean squared error (MSE) = 29.989596178232997
Mean absolute percentage error (MAPE) = 419.1899523837326
Cohen kappa score = 0.5096622719884414
Fold 2, mse = 29.9896, mad = 4.1652




Fold 2 Epoch 147 Batch 0: Train Loss = 1.3062
Fold 2, mse = 31.3068, mad = 4.0670




Fold 2 Epoch 148 Batch 0: Train Loss = 0.8960
Fold 2, mse = 30.6085, mad = 3.9110




Fold 2 Epoch 149 Batch 0: Train Loss = 1.3186
Fold 2, mse = 30.2978, mad = 4.0408
model_module_idx is 0
model_module_idx is 0
model_module_idx is 0
model_module_idx is 0
model_module_idx is 1
model_module_idx is 1
model_module_idx is 1
model_module_idx is 1
model_module_idx is 2
model_module_idx is 2
model_module_idx is 2
model_module_idx is 2
model_module_idx is 3
model_module_idx is 3
model_module_idx is 3
model_module_idx is 3
model_module_idx is 4
model_module_idx is 4
model_module_idx is 4
model_module_idx is 4
model_module_idx is 5
model_module_idx is 5
model_module_idx is 5
model_module_idx is 5
model_module_idx is 6
model_module_idx is 6
model_module_idx is 6
model_module_idx is 6
model_module_idx is 7
model_module_idx is 7
model_module_idx is 7
model_module_idx is 7
model_module_idx is 8
model_module_idx is 8
model_module_idx is 8
model_module_idx is 8
model_module_idx is 9
model_module_idx is 9
model_module_idx is 9
model_module_idx is 9
model_module_idx is 10
model_module_id



Fold 3 Epoch 0 Batch 0: Train Loss = 370943.0312
Fold 3, epoch 0: Loss = 2.4536 Valid loss = 2.5440 MSE = 48.0360 AUROC = 0.5466
------------ Save FOLD-BEST model - MSE: 48.0360 ------------
Custom bins confusion matrix:
[[54 48]
 [14 55]]
Mean absolute deviation (MAD) = 5.326424823684332
Mean squared error (MSE) = 48.03604436123168
Mean absolute percentage error (MAPE) = 644.7164658579242
Cohen kappa score = 0.3024082116067903
Fold 3, mse = 48.0360, mad = 5.3264


  prec0 = cf[0][0] / (cf[0][0] + cf[1][0])


Fold 3 Epoch 1 Batch 0: Train Loss = 33365.4141
Fold 3, mse = 48.1411, mad = 5.4226


  prec0 = cf[0][0] / (cf[0][0] + cf[1][0])


Fold 3 Epoch 2 Batch 0: Train Loss = 8520.5449
------------ Save FOLD-BEST model - MSE: 47.3760 ------------
Custom bins confusion matrix:
[[37 65]
 [ 8 61]]
Mean absolute deviation (MAD) = 5.3283274727396215
Mean squared error (MSE) = 47.37604184042629
Mean absolute percentage error (MAPE) = 656.3263703388303
Cohen kappa score = 0.21771009588268475
Fold 3, mse = 47.3760, mad = 5.3283




Fold 3 Epoch 3 Batch 0: Train Loss = 4228.6621
Fold 3, mse = 47.4892, mad = 5.4173




Fold 3 Epoch 4 Batch 0: Train Loss = 1629.8640
------------ Save FOLD-BEST model - MSE: 46.7042 ------------
Custom bins confusion matrix:
[[39 63]
 [ 8 61]]
Mean absolute deviation (MAD) = 5.2889208534725904
Mean squared error (MSE) = 46.70416756625951
Mean absolute percentage error (MAPE) = 658.0144305000878
Cohen kappa score = 0.23598263167830846
Fold 3, mse = 46.7042, mad = 5.2889




Fold 3 Epoch 5 Batch 0: Train Loss = 902.8235
Fold 3, mse = 46.9501, mad = 5.4082




Fold 3 Epoch 6 Batch 0: Train Loss = 461.1207
Fold 3, mse = 48.4442, mad = 5.6774




Fold 3 Epoch 7 Batch 0: Train Loss = 255.4033
------------ Save FOLD-BEST model - MSE: 46.4918 ------------
Custom bins confusion matrix:
[[33 69]
 [ 7 62]]
Mean absolute deviation (MAD) = 5.3285937698725245
Mean squared error (MSE) = 46.491760433338484
Mean absolute percentage error (MAPE) = 680.7587484008676
Cohen kappa score = 0.19389653889095648
Fold 3, mse = 46.4918, mad = 5.3286


  prec1 = cf[1][1] / (cf[1][1] + cf[0][1])


Fold 3 Epoch 8 Batch 0: Train Loss = 113.9199
------------ Save FOLD-BEST model - MSE: 45.9323 ------------
Custom bins confusion matrix:
[[62 40]
 [14 55]]
Mean absolute deviation (MAD) = 5.142492061642645
Mean squared error (MSE) = 45.932347414700715
Mean absolute percentage error (MAPE) = 622.439784694091
Cohen kappa score = 0.38167938931297707
Fold 3, mse = 45.9323, mad = 5.1425


  prec1 = cf[1][1] / (cf[1][1] + cf[0][1])


Fold 3 Epoch 9 Batch 0: Train Loss = 63.3793
Fold 3, mse = 47.0681, mad = 5.5155


  prec1 = cf[1][1] / (cf[1][1] + cf[0][1])


Fold 3 Epoch 10 Batch 0: Train Loss = 35.0977
Fold 3, epoch 10: Loss = 2.3011 Valid loss = 2.4483 MSE = 47.0925 AUROC = 0.5985
Fold 3, mse = 47.0925, mad = 5.5292


  prec1 = cf[1][1] / (cf[1][1] + cf[0][1])


Fold 3 Epoch 11 Batch 0: Train Loss = 19.7962
------------ Save FOLD-BEST model - MSE: 45.2788 ------------
Custom bins confusion matrix:
[[60 42]
 [14 55]]
Mean absolute deviation (MAD) = 5.1317864926038865
Mean squared error (MSE) = 45.27881444620349
Mean absolute percentage error (MAPE) = 638.9702833142002
Cohen kappa score = 0.36160000000000003
Fold 3, mse = 45.2788, mad = 5.1318


  prec1 = cf[1][1] / (cf[1][1] + cf[0][1])


Fold 3 Epoch 12 Batch 0: Train Loss = 10.7819
Fold 3, mse = 46.0672, mad = 5.3892


  prec1 = cf[1][1] / (cf[1][1] + cf[0][1])


Fold 3 Epoch 13 Batch 0: Train Loss = 5.7767
------------ Save FOLD-BEST model - MSE: 44.5219 ------------
Custom bins confusion matrix:
[[50 52]
 [10 59]]
Mean absolute deviation (MAD) = 5.121653378993198
Mean squared error (MSE) = 44.52185384791103
Mean absolute percentage error (MAPE) = 657.529247679315
Cohen kappa score = 0.3143189755529686
Fold 3, mse = 44.5219, mad = 5.1217


  prec1 = cf[1][1] / (cf[1][1] + cf[0][1])


Fold 3 Epoch 14 Batch 0: Train Loss = 4.2780
Fold 3, mse = 45.7483, mad = 5.3988


  prec1 = cf[1][1] / (cf[1][1] + cf[0][1])


Fold 3 Epoch 15 Batch 0: Train Loss = 3.5161
Fold 3, mse = 44.6441, mad = 5.1677




Fold 3 Epoch 16 Batch 0: Train Loss = 2.8297
Fold 3, mse = 45.5564, mad = 5.3699




Fold 3 Epoch 17 Batch 0: Train Loss = 2.8210
------------ Save FOLD-BEST model - MSE: 44.2145 ------------
Custom bins confusion matrix:
[[45 57]
 [ 9 60]]
Mean absolute deviation (MAD) = 5.109968506145012
Mean squared error (MSE) = 44.214474639657446
Mean absolute percentage error (MAPE) = 669.2566615107086
Cohen kappa score = 0.2793103448275861
Fold 3, mse = 44.2145, mad = 5.1100




Fold 3 Epoch 18 Batch 0: Train Loss = 2.5746
Fold 3, mse = 45.7973, mad = 5.4217




Fold 3 Epoch 19 Batch 0: Train Loss = 2.6152
Fold 3, mse = 44.2569, mad = 5.1335




Fold 3 Epoch 20 Batch 0: Train Loss = 2.2818
Fold 3, epoch 20: Loss = 2.1785 Valid loss = 2.3507 MSE = 45.5869 AUROC = 0.6753
Fold 3, mse = 45.5869, mad = 5.3896




Fold 3 Epoch 21 Batch 0: Train Loss = 2.4364
Fold 3, mse = 44.5108, mad = 5.1888




Fold 3 Epoch 22 Batch 0: Train Loss = 2.6718
Fold 3, mse = 45.2486, mad = 5.3372




Fold 3 Epoch 23 Batch 0: Train Loss = 2.1491
Fold 3, mse = 44.6044, mad = 5.2101




Fold 3 Epoch 24 Batch 0: Train Loss = 2.2029
Fold 3, mse = 45.1864, mad = 5.2939




Fold 3 Epoch 25 Batch 0: Train Loss = 2.4282
Fold 3, mse = 44.5399, mad = 5.1726




Fold 3 Epoch 26 Batch 0: Train Loss = 1.9972
Fold 3, mse = 45.8692, mad = 5.4232
Fold 3 Epoch 27 Batch 0: Train Loss = 2.2184




------------ Save FOLD-BEST model - MSE: 43.9952 ------------
Custom bins confusion matrix:
[[45 57]
 [ 9 60]]
Mean absolute deviation (MAD) = 5.068303358562506
Mean squared error (MSE) = 43.99516373052275
Mean absolute percentage error (MAPE) = 669.9580427106773
Cohen kappa score = 0.2793103448275861
Fold 3, mse = 43.9952, mad = 5.0683




Fold 3 Epoch 28 Batch 0: Train Loss = 2.6918
Fold 3, mse = 44.7753, mad = 5.2392
Fold 3 Epoch 29 Batch 0: Train Loss = 2.2518




Fold 3, mse = 44.1396, mad = 5.1053
Fold 3 Epoch 30 Batch 0: Train Loss = 2.0354




Fold 3, epoch 30: Loss = 2.0145 Valid loss = 2.2688 MSE = 45.0920 AUROC = 0.7943
Fold 3, mse = 45.0920, mad = 5.2643




Fold 3 Epoch 31 Batch 0: Train Loss = 1.8774
Fold 3, mse = 44.0412, mad = 5.0627
Fold 3 Epoch 32 Batch 0: Train Loss = 1.9186




Fold 3, mse = 45.4948, mad = 5.3339




Fold 3 Epoch 33 Batch 0: Train Loss = 2.0692
------------ Save FOLD-BEST model - MSE: 43.0109 ------------
Custom bins confusion matrix:
[[57 45]
 [11 58]]
Mean absolute deviation (MAD) = 4.876521256673038
Mean squared error (MSE) = 43.01094550746842
Mean absolute percentage error (MAPE) = 628.5412617712245
Cohen kappa score = 0.36991709435452036
Fold 3, mse = 43.0109, mad = 4.8765




Fold 3 Epoch 34 Batch 0: Train Loss = 2.2666
Fold 3, mse = 45.6833, mad = 5.3266




Fold 3 Epoch 35 Batch 0: Train Loss = 2.1822
Fold 3, mse = 43.6218, mad = 4.9386




Fold 3 Epoch 36 Batch 0: Train Loss = 1.8198
Fold 3, mse = 45.6334, mad = 5.2344
Fold 3 Epoch 37 Batch 0: Train Loss = 2.1230




Fold 3, mse = 44.6881, mad = 5.1176




Fold 3 Epoch 38 Batch 0: Train Loss = 2.2319
Fold 3, mse = 43.5180, mad = 4.9921




Fold 3 Epoch 39 Batch 0: Train Loss = 1.8340
Fold 3, mse = 44.5275, mad = 5.1325




Fold 3 Epoch 40 Batch 0: Train Loss = 2.2906
Fold 3, epoch 40: Loss = 1.9079 Valid loss = 2.0612 MSE = 43.3937 AUROC = 0.9075
Fold 3, mse = 43.3937, mad = 4.8707




Fold 3 Epoch 41 Batch 0: Train Loss = 2.1099
------------ Save FOLD-BEST model - MSE: 42.3045 ------------
Custom bins confusion matrix:
[[61 41]
 [12 57]]
Mean absolute deviation (MAD) = 4.6951909685456625
Mean squared error (MSE) = 42.304478457143496
Mean absolute percentage error (MAPE) = 573.7161994824933
Cohen kappa score = 0.39712632209139886
Fold 3, mse = 42.3045, mad = 4.6952




Fold 3 Epoch 42 Batch 0: Train Loss = 1.9915
Fold 3, mse = 43.8277, mad = 5.0286




Fold 3 Epoch 43 Batch 0: Train Loss = 2.2160
Fold 3, mse = 42.5785, mad = 4.7406




Fold 3 Epoch 44 Batch 0: Train Loss = 2.0721
Fold 3, mse = 45.5375, mad = 5.2163




Fold 3 Epoch 45 Batch 0: Train Loss = 1.8571
------------ Save FOLD-BEST model - MSE: 41.8098 ------------
Custom bins confusion matrix:
[[71 31]
 [13 56]]
Mean absolute deviation (MAD) = 4.624832782728344
Mean squared error (MSE) = 41.80977691860192
Mean absolute percentage error (MAPE) = 552.284753029347
Cohen kappa score = 0.4871165644171779
Fold 3, mse = 41.8098, mad = 4.6248




Fold 3 Epoch 46 Batch 0: Train Loss = 1.9255
Fold 3, mse = 41.9246, mad = 4.7157




Fold 3 Epoch 47 Batch 0: Train Loss = 1.6938
Fold 3, mse = 43.8297, mad = 5.0844




Fold 3 Epoch 48 Batch 0: Train Loss = 1.9558
------------ Save FOLD-BEST model - MSE: 41.4123 ------------
Custom bins confusion matrix:
[[66 36]
 [12 57]]
Mean absolute deviation (MAD) = 4.637580646366266
Mean squared error (MSE) = 41.41229906853123
Mean absolute percentage error (MAPE) = 568.4735530050925
Cohen kappa score = 0.44794188861985473
Fold 3, mse = 41.4123, mad = 4.6376




Fold 3 Epoch 49 Batch 0: Train Loss = 1.9751
Fold 3, mse = 42.6861, mad = 4.7890
Fold 3 Epoch 50 Batch 0: Train Loss = 1.8752




Fold 3, epoch 50: Loss = 1.7301 Valid loss = 2.0086 MSE = 41.7787 AUROC = 0.9328
Fold 3, mse = 41.7787, mad = 4.6684
Fold 3 Epoch 51 Batch 0: Train Loss = 1.7473




Fold 3, mse = 41.5819, mad = 4.6621
Fold 3 Epoch 52 Batch 0: Train Loss = 1.8422




------------ Save FOLD-BEST model - MSE: 41.2660 ------------
Custom bins confusion matrix:
[[64 38]
 [11 58]]
Mean absolute deviation (MAD) = 4.656846441342342
Mean squared error (MSE) = 41.265956232591094
Mean absolute percentage error (MAPE) = 563.979697335685
Cohen kappa score = 0.44016837041491286
Fold 3, mse = 41.2660, mad = 4.6568




Fold 3 Epoch 53 Batch 0: Train Loss = 1.7979
Fold 3, mse = 42.0484, mad = 4.7247




Fold 3 Epoch 54 Batch 0: Train Loss = 1.8528
Fold 3, mse = 41.4544, mad = 4.5903




Fold 3 Epoch 55 Batch 0: Train Loss = 1.8490
Fold 3, mse = 42.7129, mad = 4.7483




Fold 3 Epoch 56 Batch 0: Train Loss = 1.6933
Fold 3, mse = 42.6361, mad = 4.7437




Fold 3 Epoch 57 Batch 0: Train Loss = 1.8616
Fold 3, mse = 41.9501, mad = 4.6983




Fold 3 Epoch 58 Batch 0: Train Loss = 1.8588
Fold 3, mse = 42.0363, mad = 4.6993




Fold 3 Epoch 59 Batch 0: Train Loss = 1.7072
------------ Save FOLD-BEST model - MSE: 40.8345 ------------
Custom bins confusion matrix:
[[77 25]
 [20 49]]
Mean absolute deviation (MAD) = 4.54013252221099
Mean squared error (MSE) = 40.83446608185083
Mean absolute percentage error (MAPE) = 494.5828047498076
Cohen kappa score = 0.4596587318306299
Fold 3, mse = 40.8345, mad = 4.5401




Fold 3 Epoch 60 Batch 0: Train Loss = 1.4646
Fold 3, epoch 60: Loss = 1.6268 Valid loss = 1.8615 MSE = 40.2524 AUROC = 0.9571
------------ Save FOLD-BEST model - MSE: 40.2524 ------------
Custom bins confusion matrix:
[[69 33]
 [12 57]]
Mean absolute deviation (MAD) = 4.533454327109091
Mean squared error (MSE) = 40.25241329541019
Mean absolute percentage error (MAPE) = 533.8251564954523
Cohen kappa score = 0.4789762340036563
Fold 3, mse = 40.2524, mad = 4.5335




Fold 3 Epoch 61 Batch 0: Train Loss = 1.7512
Fold 3, mse = 41.6822, mad = 4.7088




Fold 3 Epoch 62 Batch 0: Train Loss = 1.7702
Fold 3, mse = 41.2230, mad = 4.5635




Fold 3 Epoch 63 Batch 0: Train Loss = 1.7924
Fold 3, mse = 40.8062, mad = 4.6334




Fold 3 Epoch 64 Batch 0: Train Loss = 1.7688
------------ Save FOLD-BEST model - MSE: 40.0337 ------------
Custom bins confusion matrix:
[[73 29]
 [14 55]]
Mean absolute deviation (MAD) = 4.593520116122473
Mean squared error (MSE) = 40.033731187855
Mean absolute percentage error (MAPE) = 562.429438679403
Cohen kappa score = 0.49536751080914143
Fold 3, mse = 40.0337, mad = 4.5935




Fold 3 Epoch 65 Batch 0: Train Loss = 1.5399
Fold 3, mse = 40.3775, mad = 4.6066




Fold 3 Epoch 66 Batch 0: Train Loss = 1.6482
Fold 3, mse = 40.7829, mad = 4.5503




Fold 3 Epoch 67 Batch 0: Train Loss = 1.9914
Fold 3, mse = 41.1138, mad = 4.5949




Fold 3 Epoch 68 Batch 0: Train Loss = 1.3818
------------ Save FOLD-BEST model - MSE: 39.9566 ------------
Custom bins confusion matrix:
[[67 35]
 [10 59]]
Mean absolute deviation (MAD) = 4.569057594779495
Mean squared error (MSE) = 39.95656088002158
Mean absolute percentage error (MAPE) = 564.0328268489807
Cohen kappa score = 0.48359170525468087
Fold 3, mse = 39.9566, mad = 4.5691




Fold 3 Epoch 69 Batch 0: Train Loss = 1.8427
Fold 3, mse = 39.9881, mad = 4.5503




Fold 3 Epoch 70 Batch 0: Train Loss = 1.6354
Fold 3, epoch 70: Loss = 1.5308 Valid loss = 1.7685 MSE = 40.4016 AUROC = 0.9776
Fold 3, mse = 40.4016, mad = 4.5471




Fold 3 Epoch 71 Batch 0: Train Loss = 1.4382
Fold 3, mse = 40.3218, mad = 4.5480




Fold 3 Epoch 72 Batch 0: Train Loss = 1.6368
------------ Save FOLD-BEST model - MSE: 39.7262 ------------
Custom bins confusion matrix:
[[79 23]
 [18 51]]
Mean absolute deviation (MAD) = 4.464689300378826
Mean squared error (MSE) = 39.726180789594075
Mean absolute percentage error (MAPE) = 514.586877392249
Cohen kappa score = 0.5076890667790184
Fold 3, mse = 39.7262, mad = 4.4647




Fold 3 Epoch 73 Batch 0: Train Loss = 1.4999
------------ Save FOLD-BEST model - MSE: 39.6370 ------------
Custom bins confusion matrix:
[[76 26]
 [14 55]]
Mean absolute deviation (MAD) = 4.528233654924719
Mean squared error (MSE) = 39.63701191007732
Mean absolute percentage error (MAPE) = 552.3807728496181
Cohen kappa score = 0.527363184079602
Fold 3, mse = 39.6370, mad = 4.5282




Fold 3 Epoch 74 Batch 0: Train Loss = 1.2484
Fold 3, mse = 40.1764, mad = 4.5760




Fold 3 Epoch 75 Batch 0: Train Loss = 1.3261
Fold 3, mse = 40.5628, mad = 4.7036




Fold 3 Epoch 76 Batch 0: Train Loss = 1.7324
Fold 3, mse = 40.0982, mad = 4.6784




Fold 3 Epoch 77 Batch 0: Train Loss = 1.3128
------------ Save FOLD-BEST model - MSE: 39.0390 ------------
Custom bins confusion matrix:
[[78 24]
 [16 53]]
Mean absolute deviation (MAD) = 4.484659608350293
Mean squared error (MSE) = 39.0390186786169
Mean absolute percentage error (MAPE) = 528.8988445951513
Cohen kappa score = 0.5230125523012552
Fold 3, mse = 39.0390, mad = 4.4847




Fold 3 Epoch 78 Batch 0: Train Loss = 1.3509
Fold 3, mse = 40.6652, mad = 4.6025




Fold 3 Epoch 79 Batch 0: Train Loss = 1.5879
Fold 3, mse = 39.5857, mad = 4.5161




Fold 3 Epoch 80 Batch 0: Train Loss = 1.6282
Fold 3, epoch 80: Loss = 1.4424 Valid loss = 1.7269 MSE = 39.8691 AUROC = 0.9817
Fold 3, mse = 39.8691, mad = 4.6779




Fold 3 Epoch 81 Batch 0: Train Loss = 1.6455
Fold 3, mse = 40.2702, mad = 4.6669




Fold 3 Epoch 82 Batch 0: Train Loss = 1.6353
Fold 3, mse = 40.3278, mad = 4.5355




Fold 3 Epoch 83 Batch 0: Train Loss = 1.6548
Fold 3, mse = 41.2997, mad = 4.5945




Fold 3 Epoch 84 Batch 0: Train Loss = 1.2928
Fold 3, mse = 39.5110, mad = 4.7936




Fold 3 Epoch 85 Batch 0: Train Loss = 1.7386
Fold 3, mse = 39.8033, mad = 4.5514




Fold 3 Epoch 86 Batch 0: Train Loss = 1.4983
Fold 3, mse = 40.4141, mad = 4.5718




Fold 3 Epoch 87 Batch 0: Train Loss = 1.3329
Fold 3, mse = 39.3213, mad = 4.6655




Fold 3 Epoch 88 Batch 0: Train Loss = 1.3803
------------ Save FOLD-BEST model - MSE: 38.3916 ------------
Custom bins confusion matrix:
[[71 31]
 [10 59]]
Mean absolute deviation (MAD) = 4.5304459445733425
Mean squared error (MSE) = 38.391628249773945
Mean absolute percentage error (MAPE) = 580.9720080347884
Cohen kappa score = 0.5252894576477758
Fold 3, mse = 38.3916, mad = 4.5304




Fold 3 Epoch 89 Batch 0: Train Loss = 1.6746
Fold 3, mse = 40.5120, mad = 4.6032




Fold 3 Epoch 90 Batch 0: Train Loss = 1.5291
Fold 3, epoch 90: Loss = 1.3855 Valid loss = 1.7423 MSE = 39.4217 AUROC = 0.9841
Fold 3, mse = 39.4217, mad = 4.4564




Fold 3 Epoch 91 Batch 0: Train Loss = 1.4389
Fold 3, mse = 42.1376, mad = 5.0273




Fold 3 Epoch 92 Batch 0: Train Loss = 1.4161
Fold 3, mse = 39.1207, mad = 4.4741




Fold 3 Epoch 93 Batch 0: Train Loss = 1.2434
Fold 3, mse = 40.1443, mad = 4.5403




Fold 3 Epoch 94 Batch 0: Train Loss = 1.1687
Fold 3, mse = 39.4233, mad = 4.4309




Fold 3 Epoch 95 Batch 0: Train Loss = 1.3655
Fold 3, mse = 38.9079, mad = 4.3872




Fold 3 Epoch 96 Batch 0: Train Loss = 1.2078
Fold 3, mse = 41.2136, mad = 4.5731




Fold 3 Epoch 97 Batch 0: Train Loss = 1.4311
Fold 3, mse = 39.5988, mad = 4.5239




Fold 3 Epoch 98 Batch 0: Train Loss = 1.2458
Fold 3, mse = 39.1764, mad = 4.4682




Fold 3 Epoch 99 Batch 0: Train Loss = 1.4560
------------ Save FOLD-BEST model - MSE: 37.9940 ------------
Custom bins confusion matrix:
[[68 34]
 [ 5 64]]
Mean absolute deviation (MAD) = 4.537075539968363
Mean squared error (MSE) = 37.99403815700836
Mean absolute percentage error (MAPE) = 587.7510036539608
Cohen kappa score = 0.5563759728597086
Fold 3, mse = 37.9940, mad = 4.5371




Fold 3 Epoch 100 Batch 0: Train Loss = 1.2451
Fold 3, epoch 100: Loss = 1.3693 Valid loss = 1.6350 MSE = 38.5181 AUROC = 0.9857
Fold 3, mse = 38.5181, mad = 4.4486




Fold 3 Epoch 101 Batch 0: Train Loss = 1.4413
Fold 3, mse = 39.6273, mad = 4.4618




Fold 3 Epoch 102 Batch 0: Train Loss = 1.5156
Fold 3, mse = 38.8774, mad = 4.6142




Fold 3 Epoch 103 Batch 0: Train Loss = 1.5946
------------ Save FOLD-BEST model - MSE: 37.6400 ------------
Custom bins confusion matrix:
[[72 30]
 [10 59]]
Mean absolute deviation (MAD) = 4.43665905863101
Mean squared error (MSE) = 37.640049273226374
Mean absolute percentage error (MAPE) = 572.9245434176552
Cohen kappa score = 0.5358306188925082
Fold 3, mse = 37.6400, mad = 4.4367




Fold 3 Epoch 104 Batch 0: Train Loss = 1.1313
Fold 3, mse = 39.3626, mad = 4.4800




Fold 3 Epoch 105 Batch 0: Train Loss = 1.1675
Fold 3, mse = 39.5268, mad = 4.4322
Fold 3 Epoch 106 Batch 0: Train Loss = 1.3977




Fold 3, mse = 40.0572, mad = 4.4842




Fold 3 Epoch 107 Batch 0: Train Loss = 1.4584
Fold 3, mse = 40.4222, mad = 4.4949




Fold 3 Epoch 108 Batch 0: Train Loss = 1.4337
Fold 3, mse = 39.7060, mad = 4.4532




Fold 3 Epoch 109 Batch 0: Train Loss = 1.3653
Fold 3, mse = 39.9661, mad = 4.4847




Fold 3 Epoch 110 Batch 0: Train Loss = 1.1388
Fold 3, epoch 110: Loss = 1.3046 Valid loss = 1.6386 MSE = 38.7642 AUROC = 0.9870
Fold 3, mse = 38.7642, mad = 4.4570




Fold 3 Epoch 111 Batch 0: Train Loss = 1.0954
Fold 3, mse = 39.6396, mad = 4.4458




Fold 3 Epoch 112 Batch 0: Train Loss = 1.2889
Fold 3, mse = 40.4570, mad = 4.5628




Fold 3 Epoch 113 Batch 0: Train Loss = 1.2379
Fold 3, mse = 38.0930, mad = 4.4758




Fold 3 Epoch 114 Batch 0: Train Loss = 1.3577
Fold 3, mse = 39.3627, mad = 4.5538




Fold 3 Epoch 115 Batch 0: Train Loss = 1.3090
Fold 3, mse = 39.5720, mad = 4.6134




Fold 3 Epoch 116 Batch 0: Train Loss = 1.1164
Fold 3, mse = 39.3462, mad = 4.4465




Fold 3 Epoch 117 Batch 0: Train Loss = 1.3326
Fold 3, mse = 40.9769, mad = 4.4813




Fold 3 Epoch 118 Batch 0: Train Loss = 1.0586
Fold 3, mse = 40.2254, mad = 4.8441




Fold 3 Epoch 119 Batch 0: Train Loss = 1.4664
Fold 3, mse = 37.7902, mad = 4.4847




Fold 3 Epoch 120 Batch 0: Train Loss = 1.2286
Fold 3, epoch 120: Loss = 1.2126 Valid loss = 1.7475 MSE = 42.2695 AUROC = 0.9855
Fold 3, mse = 42.2695, mad = 4.5712




Fold 3 Epoch 121 Batch 0: Train Loss = 1.5470
Fold 3, mse = 39.3102, mad = 4.4771




Fold 3 Epoch 122 Batch 0: Train Loss = 1.2445
Fold 3, mse = 39.8477, mad = 4.5089




Fold 3 Epoch 123 Batch 0: Train Loss = 1.1027
Fold 3, mse = 40.3629, mad = 4.6951




Fold 3 Epoch 124 Batch 0: Train Loss = 1.4703
Fold 3, mse = 41.0757, mad = 4.7072




Fold 3 Epoch 125 Batch 0: Train Loss = 1.1315
Fold 3, mse = 41.1534, mad = 4.5024




Fold 3 Epoch 126 Batch 0: Train Loss = 1.2914
Fold 3, mse = 38.9482, mad = 4.4811




Fold 3 Epoch 127 Batch 0: Train Loss = 1.3370
Fold 3, mse = 40.7922, mad = 4.4992




Fold 3 Epoch 128 Batch 0: Train Loss = 1.2325
Fold 3, mse = 40.8972, mad = 4.4952




Fold 3 Epoch 129 Batch 0: Train Loss = 1.3325
Fold 3, mse = 39.1815, mad = 4.5185




Fold 3 Epoch 130 Batch 0: Train Loss = 1.1026
Fold 3, epoch 130: Loss = 1.1569 Valid loss = 1.6816 MSE = 40.4245 AUROC = 0.9870
Fold 3, mse = 40.4245, mad = 4.4974




Fold 3 Epoch 131 Batch 0: Train Loss = 1.3151
Fold 3, mse = 40.4353, mad = 4.5849




Fold 3 Epoch 132 Batch 0: Train Loss = 1.0749
Fold 3, mse = 41.0345, mad = 4.6539




Fold 3 Epoch 133 Batch 0: Train Loss = 1.2366
Fold 3, mse = 40.1149, mad = 4.5994




Fold 3 Epoch 134 Batch 0: Train Loss = 1.1738
Fold 3, mse = 42.1722, mad = 4.6220




Fold 3 Epoch 135 Batch 0: Train Loss = 1.3450
Fold 3, mse = 42.1157, mad = 4.4717




Fold 3 Epoch 136 Batch 0: Train Loss = 1.0867
Fold 3, mse = 42.1039, mad = 4.5050




Fold 3 Epoch 137 Batch 0: Train Loss = 1.1723
Fold 3, mse = 40.4630, mad = 4.5142




Fold 3 Epoch 138 Batch 0: Train Loss = 1.0731
Fold 3, mse = 41.0907, mad = 4.5914




Fold 3 Epoch 139 Batch 0: Train Loss = 1.2611
Fold 3, mse = 42.7841, mad = 4.7368




Fold 3 Epoch 140 Batch 0: Train Loss = 1.1647
Fold 3, epoch 140: Loss = 1.0977 Valid loss = 1.8152 MSE = 43.5707 AUROC = 0.9894
Fold 3, mse = 43.5707, mad = 4.7596




Fold 3 Epoch 141 Batch 0: Train Loss = 1.2279
Fold 3, mse = 43.4040, mad = 4.5133




Fold 3 Epoch 142 Batch 0: Train Loss = 1.2982
Fold 3, mse = 41.5315, mad = 4.5490




Fold 3 Epoch 143 Batch 0: Train Loss = 1.3203
Fold 3, mse = 43.0972, mad = 4.5337




Fold 3 Epoch 144 Batch 0: Train Loss = 1.1886
Fold 3, mse = 41.2751, mad = 4.5008




Fold 3 Epoch 145 Batch 0: Train Loss = 0.9120
Fold 3, mse = 41.8877, mad = 4.5387




Fold 3 Epoch 146 Batch 0: Train Loss = 1.3121
Fold 3, mse = 44.6448, mad = 4.8165




Fold 3 Epoch 147 Batch 0: Train Loss = 1.1193
Fold 3, mse = 40.4707, mad = 4.5112




Fold 3 Epoch 148 Batch 0: Train Loss = 0.8633
Fold 3, mse = 42.3247, mad = 4.5516




Fold 3 Epoch 149 Batch 0: Train Loss = 0.9480
Fold 3, mse = 42.0849, mad = 4.7011
model_module_idx is 0
model_module_idx is 0
model_module_idx is 0
model_module_idx is 0
model_module_idx is 1
model_module_idx is 1
model_module_idx is 1
model_module_idx is 1
model_module_idx is 2
model_module_idx is 2
model_module_idx is 2
model_module_idx is 2
model_module_idx is 3
model_module_idx is 3
model_module_idx is 3
model_module_idx is 3
model_module_idx is 4
model_module_idx is 4
model_module_idx is 4
model_module_idx is 4
model_module_idx is 5
model_module_idx is 5
model_module_idx is 5
model_module_idx is 5
model_module_idx is 6
model_module_idx is 6
model_module_idx is 6
model_module_idx is 6
model_module_idx is 7
model_module_idx is 7
model_module_idx is 7
model_module_idx is 7
model_module_idx is 8
model_module_idx is 8
model_module_idx is 8
model_module_idx is 8
model_module_idx is 9
model_module_idx is 9
model_module_idx is 9
model_module_idx is 9
model_module_idx is 10
model_module_id



Fold 4 Epoch 0 Batch 0: Train Loss = 233247.2344
Fold 4, epoch 0: Loss = 2.5952 Valid loss = 2.6381 MSE = 50.7483 AUROC = 0.3604
------------ Save FOLD-BEST model - MSE: 50.7483 ------------
Custom bins confusion matrix:
[[97  0  0]
 [71  0  0]
 [ 1  0  0]]
Mean absolute deviation (MAD) = 5.198244965521213
Mean squared error (MSE) = 50.74825352435708
Mean absolute percentage error (MAPE) = 425.26451798774747
Cohen kappa score = 0.0
Fold 4, mse = 50.7483, mad = 5.1982


  prec0 = cf[0][0] / (cf[0][0] + cf[1][0])


Fold 4 Epoch 1 Batch 0: Train Loss = 26091.1191
------------ Save FOLD-BEST model - MSE: 49.4963 ------------
Custom bins confusion matrix:
[[97  0  0]
 [71  0  0]
 [ 1  0  0]]
Mean absolute deviation (MAD) = 5.173847568117865
Mean squared error (MSE) = 49.49629244077605
Mean absolute percentage error (MAPE) = 443.7352885705814
Cohen kappa score = 0.0
Fold 4, mse = 49.4963, mad = 5.1738




Fold 4 Epoch 2 Batch 0: Train Loss = 6883.3843
------------ Save FOLD-BEST model - MSE: 48.6956 ------------
Custom bins confusion matrix:
[[78 19  0]
 [40 31  0]
 [ 1  0  0]]
Mean absolute deviation (MAD) = 5.2065680136151435
Mean squared error (MSE) = 48.69564489868873
Mean absolute percentage error (MAPE) = 472.2111347403561
Cohen kappa score = 0.24126002796791057
Fold 4, mse = 48.6956, mad = 5.2066




Fold 4 Epoch 3 Batch 0: Train Loss = 3553.0984
------------ Save FOLD-BEST model - MSE: 47.4020 ------------
Custom bins confusion matrix:
[[65 32  0]
 [16 55  0]
 [ 0  1  0]]
Mean absolute deviation (MAD) = 5.12017337912907
Mean squared error (MSE) = 47.40201972882388
Mean absolute percentage error (MAPE) = 472.830825738272
Cohen kappa score = 0.43035014101946745
Fold 4, mse = 47.4020, mad = 5.1202




Fold 4 Epoch 4 Batch 0: Train Loss = 1634.5262
------------ Save FOLD-BEST model - MSE: 45.7680 ------------
Custom bins confusion matrix:
[[59 38  0]
 [14 57  0]
 [ 0  1  0]]
Mean absolute deviation (MAD) = 5.01354034263594
Mean squared error (MSE) = 45.76802268285485
Mean absolute percentage error (MAPE) = 481.6026044937939
Cohen kappa score = 0.3922100834633915
Fold 4, mse = 45.7680, mad = 5.0135




Fold 4 Epoch 5 Batch 0: Train Loss = 851.4927
------------ Save FOLD-BEST model - MSE: 44.2775 ------------
Custom bins confusion matrix:
[[59 38  0]
 [14 57  0]
 [ 0  1  0]]
Mean absolute deviation (MAD) = 4.8436000502950805
Mean squared error (MSE) = 44.27749535397923
Mean absolute percentage error (MAPE) = 470.2764546479999
Cohen kappa score = 0.3922100834633915
Fold 4, mse = 44.2775, mad = 4.8436


  prec1 = cf[1][1] / (cf[1][1] + cf[0][1])


Fold 4 Epoch 6 Batch 0: Train Loss = 469.2957
Fold 4, mse = 44.5034, mad = 5.0266


  prec1 = cf[1][1] / (cf[1][1] + cf[0][1])


Fold 4 Epoch 7 Batch 0: Train Loss = 220.9689
------------ Save FOLD-BEST model - MSE: 43.5097 ------------
Custom bins confusion matrix:
[[59 38  0]
 [14 57  0]
 [ 0  1  0]]
Mean absolute deviation (MAD) = 4.840849117873317
Mean squared error (MSE) = 43.5097483360387
Mean absolute percentage error (MAPE) = 503.52261978192195
Cohen kappa score = 0.3922100834633915
Fold 4, mse = 43.5097, mad = 4.8408


  prec1 = cf[1][1] / (cf[1][1] + cf[0][1])


Fold 4 Epoch 8 Batch 0: Train Loss = 119.4933
Fold 4, mse = 43.5912, mad = 4.7957


  prec1 = cf[1][1] / (cf[1][1] + cf[0][1])


Fold 4 Epoch 9 Batch 0: Train Loss = 54.7018
Fold 4, mse = 43.7042, mad = 4.8391


  prec1 = cf[1][1] / (cf[1][1] + cf[0][1])


Fold 4 Epoch 10 Batch 0: Train Loss = 31.6705
Fold 4, epoch 10: Loss = 2.2671 Valid loss = 2.3672 MSE = 43.9868 AUROC = 0.6874
Fold 4, mse = 43.9868, mad = 4.8581


  prec1 = cf[1][1] / (cf[1][1] + cf[0][1])


Fold 4 Epoch 11 Batch 0: Train Loss = 15.8538
Fold 4, mse = 44.1657, mad = 4.9124


  prec1 = cf[1][1] / (cf[1][1] + cf[0][1])


Fold 4 Epoch 12 Batch 0: Train Loss = 8.8386
Fold 4, mse = 44.0651, mad = 4.8815




Fold 4 Epoch 13 Batch 0: Train Loss = 5.6407
Fold 4, mse = 43.9930, mad = 4.8366




Fold 4 Epoch 14 Batch 0: Train Loss = 3.8445
Fold 4, mse = 44.1267, mad = 4.9379




Fold 4 Epoch 15 Batch 0: Train Loss = 3.1847
Fold 4, mse = 44.0870, mad = 4.9185




Fold 4 Epoch 16 Batch 0: Train Loss = 2.9368
Fold 4, mse = 44.7245, mad = 4.9409




Fold 4 Epoch 17 Batch 0: Train Loss = 2.8329
Fold 4, mse = 44.8422, mad = 4.9721




Fold 4 Epoch 18 Batch 0: Train Loss = 2.6796
Fold 4, mse = 44.5285, mad = 4.9208




Fold 4 Epoch 19 Batch 0: Train Loss = 2.5704
Fold 4, mse = 44.7921, mad = 4.9709




Fold 4 Epoch 20 Batch 0: Train Loss = 2.3913
Fold 4, epoch 20: Loss = 2.1403 Valid loss = 2.3250 MSE = 44.9947 AUROC = 0.8278
Fold 4, mse = 44.9947, mad = 4.9916




Fold 4 Epoch 21 Batch 0: Train Loss = 2.4393
Fold 4, mse = 45.4568, mad = 5.0573




Fold 4 Epoch 22 Batch 0: Train Loss = 2.2976
Fold 4, mse = 45.4354, mad = 5.0165




Fold 4 Epoch 23 Batch 0: Train Loss = 2.1727
Fold 4, mse = 45.7170, mad = 5.0379




Fold 4 Epoch 24 Batch 0: Train Loss = 2.0118
Fold 4, mse = 46.0110, mad = 5.0802
Fold 4 Epoch 25 Batch 0: Train Loss = 2.3112




Fold 4, mse = 44.8554, mad = 4.9661
Fold 4 Epoch 26 Batch 0: Train Loss = 2.2655




Fold 4, mse = 47.1894, mad = 5.2297




Fold 4 Epoch 27 Batch 0: Train Loss = 2.3627
Fold 4, mse = 44.5398, mad = 4.8892




Fold 4 Epoch 28 Batch 0: Train Loss = 2.1820
Fold 4, mse = 46.6537, mad = 5.1559




Fold 4 Epoch 29 Batch 0: Train Loss = 2.4248
Fold 4, mse = 45.6560, mad = 5.0376




Fold 4 Epoch 30 Batch 0: Train Loss = 2.1092
Fold 4, epoch 30: Loss = 2.0249 Valid loss = 2.2189 MSE = 46.4469 AUROC = 0.9014
Fold 4, mse = 46.4469, mad = 5.1310




Fold 4 Epoch 31 Batch 0: Train Loss = 2.0221
Fold 4, mse = 46.0397, mad = 5.1773




Fold 4 Epoch 32 Batch 0: Train Loss = 2.5363
Fold 4, mse = 47.1652, mad = 5.2497




Fold 4 Epoch 33 Batch 0: Train Loss = 1.9360
Fold 4, mse = 47.1406, mad = 5.1737




Fold 4 Epoch 34 Batch 0: Train Loss = 2.0862
Fold 4, mse = 47.6624, mad = 5.2601




Fold 4 Epoch 35 Batch 0: Train Loss = 2.1194
Fold 4, mse = 47.0975, mad = 5.2213




Fold 4 Epoch 36 Batch 0: Train Loss = 1.8456
Fold 4, mse = 48.1219, mad = 5.3313




Fold 4 Epoch 37 Batch 0: Train Loss = 2.0184
Fold 4, mse = 45.4731, mad = 5.0366




Fold 4 Epoch 38 Batch 0: Train Loss = 1.8387
Fold 4, mse = 48.4942, mad = 5.3611




Fold 4 Epoch 39 Batch 0: Train Loss = 2.0352
Fold 4, mse = 47.0303, mad = 5.2096
Fold 4 Epoch 40 Batch 0: Train Loss = 2.2921




Fold 4, epoch 40: Loss = 1.8722 Valid loss = 2.1838 MSE = 47.4227 AUROC = 0.9219
Fold 4, mse = 47.4227, mad = 5.2799




Fold 4 Epoch 41 Batch 0: Train Loss = 1.7557
Fold 4, mse = 47.2179, mad = 5.2291




Fold 4 Epoch 42 Batch 0: Train Loss = 1.9960
Fold 4, mse = 47.8263, mad = 5.3409




Fold 4 Epoch 43 Batch 0: Train Loss = 1.9560
Fold 4, mse = 48.5781, mad = 5.4360




Fold 4 Epoch 44 Batch 0: Train Loss = 2.1629
Fold 4, mse = 47.4872, mad = 5.2222




Fold 4 Epoch 45 Batch 0: Train Loss = 1.9937
Fold 4, mse = 46.0606, mad = 5.1363




Fold 4 Epoch 46 Batch 0: Train Loss = 2.1139
Fold 4, mse = 46.8403, mad = 5.2086




Fold 4 Epoch 47 Batch 0: Train Loss = 2.0454
Fold 4, mse = 48.0895, mad = 5.2806




Fold 4 Epoch 48 Batch 0: Train Loss = 2.2037
Fold 4, mse = 47.3881, mad = 5.3296




Fold 4 Epoch 49 Batch 0: Train Loss = 1.8934
Fold 4, mse = 46.5457, mad = 5.1280




Fold 4 Epoch 50 Batch 0: Train Loss = 1.9331
Fold 4, epoch 50: Loss = 1.8035 Valid loss = 2.1135 MSE = 46.6179 AUROC = 0.9445
Fold 4, mse = 46.6179, mad = 5.1634




Fold 4 Epoch 51 Batch 0: Train Loss = 2.2014
Fold 4, mse = 47.3000, mad = 5.2925




Fold 4 Epoch 52 Batch 0: Train Loss = 1.9049
Fold 4, mse = 48.4051, mad = 5.3636




Fold 4 Epoch 53 Batch 0: Train Loss = 1.9008
Fold 4, mse = 46.2491, mad = 5.1101




Fold 4 Epoch 54 Batch 0: Train Loss = 1.8158
Fold 4, mse = 47.5623, mad = 5.2678




Fold 4 Epoch 55 Batch 0: Train Loss = 1.7020
Fold 4, mse = 48.3923, mad = 5.3525




Fold 4 Epoch 56 Batch 0: Train Loss = 1.7157
Fold 4, mse = 44.9330, mad = 5.0263




Fold 4 Epoch 57 Batch 0: Train Loss = 2.0097
Fold 4, mse = 44.9798, mad = 4.9706
Fold 4 Epoch 58 Batch 0: Train Loss = 2.0518




Fold 4, mse = 44.5542, mad = 4.9591
Fold 4 Epoch 59 Batch 0: Train Loss = 1.9116




Fold 4, mse = 49.8517, mad = 5.5103
Fold 4 Epoch 60 Batch 0: Train Loss = 1.8372




Fold 4, epoch 60: Loss = 1.7751 Valid loss = 1.9879 MSE = 44.9564 AUROC = 0.9690
Fold 4, mse = 44.9564, mad = 5.0292
Fold 4 Epoch 61 Batch 0: Train Loss = 2.0155




Fold 4, mse = 44.3365, mad = 4.8822




Fold 4 Epoch 62 Batch 0: Train Loss = 1.8331
Fold 4, mse = 48.0601, mad = 5.3070




Fold 4 Epoch 63 Batch 0: Train Loss = 1.5649
Fold 4, mse = 45.4427, mad = 5.0357




Fold 4 Epoch 64 Batch 0: Train Loss = 1.9517
Fold 4, mse = 45.7666, mad = 5.0880




Fold 4 Epoch 65 Batch 0: Train Loss = 1.7068
Fold 4, mse = 47.4591, mad = 5.2477




Fold 4 Epoch 66 Batch 0: Train Loss = 1.9651
Fold 4, mse = 44.0244, mad = 4.9248
Fold 4 Epoch 67 Batch 0: Train Loss = 1.5265




Fold 4, mse = 46.0566, mad = 5.1894




Fold 4 Epoch 68 Batch 0: Train Loss = 1.9741
Fold 4, mse = 47.0308, mad = 5.2844




Fold 4 Epoch 69 Batch 0: Train Loss = 1.6170
Fold 4, mse = 43.5141, mad = 4.8386




Fold 4 Epoch 70 Batch 0: Train Loss = 2.0185
Fold 4, epoch 70: Loss = 1.6456 Valid loss = 1.9427 MSE = 44.3198 AUROC = 0.9691
Fold 4, mse = 44.3198, mad = 4.9185




Fold 4 Epoch 71 Batch 0: Train Loss = 1.9576
Fold 4, mse = 46.4558, mad = 5.2929




Fold 4 Epoch 72 Batch 0: Train Loss = 1.7458
Fold 4, mse = 44.9543, mad = 4.9853




Fold 4 Epoch 73 Batch 0: Train Loss = 1.6448
Fold 4, mse = 43.9828, mad = 5.0477
Fold 4 Epoch 74 Batch 0: Train Loss = 1.9325




Fold 4, mse = 44.3162, mad = 4.8428




Fold 4 Epoch 75 Batch 0: Train Loss = 1.3150
------------ Save FOLD-BEST model - MSE: 43.3258 ------------
Custom bins confusion matrix:
[[65 32  0]
 [28 43  0]
 [ 0  1  0]]
Mean absolute deviation (MAD) = 5.0539640772129
Mean squared error (MSE) = 43.325804298726226
Mean absolute percentage error (MAPE) = 447.079437732316
Cohen kappa score = 0.2759008218023461
Fold 4, mse = 43.3258, mad = 5.0540




Fold 4 Epoch 76 Batch 0: Train Loss = 1.5514
Fold 4, mse = 44.4997, mad = 4.9323




Fold 4 Epoch 77 Batch 0: Train Loss = 1.7482
Fold 4, mse = 44.3177, mad = 5.0003




Fold 4 Epoch 78 Batch 0: Train Loss = 1.9819
------------ Save FOLD-BEST model - MSE: 42.8079 ------------
Custom bins confusion matrix:
[[68 29  0]
 [27 44  0]
 [ 0  1  0]]
Mean absolute deviation (MAD) = 4.894439140164973
Mean squared error (MSE) = 42.8079363371372
Mean absolute percentage error (MAPE) = 430.820915236601
Cohen kappa score = 0.3209980968492282
Fold 4, mse = 42.8079, mad = 4.8944




Fold 4 Epoch 79 Batch 0: Train Loss = 1.8084
Fold 4, mse = 44.3517, mad = 5.0029




Fold 4 Epoch 80 Batch 0: Train Loss = 1.8397
Fold 4, epoch 80: Loss = 1.6312 Valid loss = 1.8810 MSE = 42.8511 AUROC = 0.9624
Fold 4, mse = 42.8511, mad = 4.9217




Fold 4 Epoch 81 Batch 0: Train Loss = 1.8817
Fold 4, mse = 43.6762, mad = 4.9965




Fold 4 Epoch 82 Batch 0: Train Loss = 1.7971
Fold 4, mse = 43.0505, mad = 4.9306




Fold 4 Epoch 83 Batch 0: Train Loss = 1.8689
Fold 4, mse = 43.9676, mad = 5.0657




Fold 4 Epoch 84 Batch 0: Train Loss = 1.7352
------------ Save FOLD-BEST model - MSE: 42.3543 ------------
Custom bins confusion matrix:
[[72 25  0]
 [30 41  0]
 [ 0  1  0]]
Mean absolute deviation (MAD) = 4.862455465299427
Mean squared error (MSE) = 42.354256925814596
Mean absolute percentage error (MAPE) = 405.90286091110494
Cohen kappa score = 0.3245789323437055
Fold 4, mse = 42.3543, mad = 4.8625




Fold 4 Epoch 85 Batch 0: Train Loss = 1.5996
Fold 4, mse = 42.9281, mad = 4.9379




Fold 4 Epoch 86 Batch 0: Train Loss = 1.2833
Fold 4, mse = 43.6005, mad = 5.1146




Fold 4 Epoch 87 Batch 0: Train Loss = 1.3407
Fold 4, mse = 46.8359, mad = 5.4314




Fold 4 Epoch 88 Batch 0: Train Loss = 1.7428
Fold 4, mse = 42.8620, mad = 4.8081




Fold 4 Epoch 89 Batch 0: Train Loss = 1.6833
Fold 4, mse = 42.4797, mad = 4.9377
Fold 4 Epoch 90 Batch 0: Train Loss = 1.7082




Fold 4, epoch 90: Loss = 1.5219 Valid loss = 1.8883 MSE = 42.8345 AUROC = 0.9610
Fold 4, mse = 42.8345, mad = 4.9865




Fold 4 Epoch 91 Batch 0: Train Loss = 1.2484
Fold 4, mse = 44.3808, mad = 5.1696




Fold 4 Epoch 92 Batch 0: Train Loss = 1.5315
Fold 4, mse = 42.8523, mad = 5.0608
Fold 4 Epoch 93 Batch 0: Train Loss = 1.4463




Fold 4, mse = 44.5112, mad = 5.1337




Fold 4 Epoch 94 Batch 0: Train Loss = 1.6169
Fold 4, mse = 44.3728, mad = 5.2380
Fold 4 Epoch 95 Batch 0: Train Loss = 1.5212




Fold 4, mse = 42.7527, mad = 4.9370
Fold 4 Epoch 96 Batch 0: Train Loss = 1.4931




------------ Save FOLD-BEST model - MSE: 40.7254 ------------
Custom bins confusion matrix:
[[74 23  0]
 [29 42  0]
 [ 0  1  0]]
Mean absolute deviation (MAD) = 4.689563413584505
Mean squared error (MSE) = 40.72543122749822
Mean absolute percentage error (MAPE) = 388.2375349364396
Cohen kappa score = 0.35961964681489955
Fold 4, mse = 40.7254, mad = 4.6896




Fold 4 Epoch 97 Batch 0: Train Loss = 1.3579
Fold 4, mse = 43.4884, mad = 4.9284




Fold 4 Epoch 98 Batch 0: Train Loss = 1.3824
------------ Save FOLD-BEST model - MSE: 40.1421 ------------
Custom bins confusion matrix:
[[69 28  0]
 [28 43  0]
 [ 0  1  0]]
Mean absolute deviation (MAD) = 4.795915136524828
Mean squared error (MSE) = 40.14205784012467
Mean absolute percentage error (MAPE) = 410.2794348051943
Cohen kappa score = 0.3185965905071797
Fold 4, mse = 40.1421, mad = 4.7959




Fold 4 Epoch 99 Batch 0: Train Loss = 1.2341
Fold 4, mse = 43.3249, mad = 4.9612




Fold 4 Epoch 100 Batch 0: Train Loss = 1.4653
Fold 4, epoch 100: Loss = 1.4618 Valid loss = 1.8212 MSE = 41.3562 AUROC = 0.9603
Fold 4, mse = 41.3562, mad = 4.9051




Fold 4 Epoch 101 Batch 0: Train Loss = 1.1251
Fold 4, mse = 41.6313, mad = 4.7896




Fold 4 Epoch 102 Batch 0: Train Loss = 1.3537
Fold 4, mse = 41.1273, mad = 4.8736




Fold 4 Epoch 103 Batch 0: Train Loss = 1.5850
Fold 4, mse = 41.5419, mad = 4.8148




Fold 4 Epoch 104 Batch 0: Train Loss = 1.3650
Fold 4, mse = 41.2632, mad = 4.7732




Fold 4 Epoch 105 Batch 0: Train Loss = 1.4139
Fold 4, mse = 41.2326, mad = 4.7235




Fold 4 Epoch 106 Batch 0: Train Loss = 1.0955
Fold 4, mse = 41.2279, mad = 4.8357




Fold 4 Epoch 107 Batch 0: Train Loss = 1.2479
Fold 4, mse = 40.7560, mad = 4.7288




Fold 4 Epoch 108 Batch 0: Train Loss = 1.4407
Fold 4, mse = 41.2828, mad = 4.7629




Fold 4 Epoch 109 Batch 0: Train Loss = 1.3414
Fold 4, mse = 41.0680, mad = 4.8459




Fold 4 Epoch 110 Batch 0: Train Loss = 1.3282
Fold 4, epoch 110: Loss = 1.3978 Valid loss = 1.8865 MSE = 42.9404 AUROC = 0.9610
Fold 4, mse = 42.9404, mad = 5.0505




Fold 4 Epoch 111 Batch 0: Train Loss = 1.2557
Fold 4, mse = 40.7420, mad = 4.8382




Fold 4 Epoch 112 Batch 0: Train Loss = 1.3059
Fold 4, mse = 41.3916, mad = 4.6922




Fold 4 Epoch 113 Batch 0: Train Loss = 1.4365
------------ Save FOLD-BEST model - MSE: 40.0620 ------------
Custom bins confusion matrix:
[[78 19  0]
 [31 40  0]
 [ 0  1  0]]
Mean absolute deviation (MAD) = 4.688992161042338
Mean squared error (MSE) = 40.06200171509662
Mean absolute percentage error (MAPE) = 370.01977116638443
Cohen kappa score = 0.37710486377104857
Fold 4, mse = 40.0620, mad = 4.6890




Fold 4 Epoch 114 Batch 0: Train Loss = 1.1876
Fold 4, mse = 44.7388, mad = 5.2129




Fold 4 Epoch 115 Batch 0: Train Loss = 1.3119
Fold 4, mse = 41.0742, mad = 4.8175




Fold 4 Epoch 116 Batch 0: Train Loss = 1.6115
------------ Save FOLD-BEST model - MSE: 40.0591 ------------
Custom bins confusion matrix:
[[81 16  0]
 [33 38  0]
 [ 0  1  0]]
Mean absolute deviation (MAD) = 4.566590520393034
Mean squared error (MSE) = 40.059054103225456
Mean absolute percentage error (MAPE) = 353.38788601937887
Cohen kappa score = 0.38375145857642934
Fold 4, mse = 40.0591, mad = 4.5666




Fold 4 Epoch 117 Batch 0: Train Loss = 1.3853
Fold 4, mse = 40.7181, mad = 4.5780




Fold 4 Epoch 118 Batch 0: Train Loss = 1.4080
Fold 4, mse = 41.7254, mad = 4.9832




Fold 4 Epoch 119 Batch 0: Train Loss = 1.3732
Fold 4, mse = 40.8297, mad = 4.8555




Fold 4 Epoch 120 Batch 0: Train Loss = 1.1160
Fold 4, epoch 120: Loss = 1.3574 Valid loss = 1.7816 MSE = 40.5178 AUROC = 0.9621
Fold 4, mse = 40.5178, mad = 4.7127
Fold 4 Epoch 121 Batch 0: Train Loss = 1.0285




In [None]:
with open('history.pkl', 'wb') as f:
    pickle.dump(history, f)