In [23]:
import numpy as np
import argparse
import os
import imp
import re
import pickle5 as pickle
import datetime
import random
import math
import logging
import copy
import matplotlib.pyplot as plt
import sklearn
import logging
from sklearn.cluster import KMeans
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import KFold
from sklearn.neighbors import kneighbors_graph
from torch.nn.utils.rnn import pad_packed_sequence, pack_padded_sequence
import torch
from torch import nn
import torch.nn.utils.rnn as rnn_utils
from torch.utils import data
from torch.autograd import Variable
import torch.nn.functional as F
from torch.nn import Parameter

from utils import utils
from utils.readers import InHospitalMortalityReader
from utils.preprocessing import Discretizer, Normalizer
from utils import metrics
from utils import common_utils
from torch.autograd import Function
from torch.utils.data import Dataset
from torch.utils.data import DataLoader

In [24]:
target_dataset = 'PD' 
RANDOM_SEED = 43
np.random.seed(RANDOM_SEED) #numpy
random.seed(RANDOM_SEED)
torch.manual_seed(RANDOM_SEED) # cpu
torch.cuda.manual_seed(RANDOM_SEED) #gpu
torch.backends.cudnn.deterministic=True # cudnn

# Use CUDA if available
device = torch.device("cuda:3" if torch.cuda.is_available() == True else 'cpu')
# print("available device: {}".format(device))
reverse = False
model_name = 'codats'

In [25]:
if reverse:
    file_name = 'log_file' + '_' + model_name + '_' + target_dataset + '_' + 'reverse' + '.log'
else:
    file_name = 'log_file' + '_' + model_name + '_' + target_dataset + '.log'
def get_logger(name, file_name):
    logger = logging.getLogger(name)
    logger.setLevel(logging.INFO)
    
    # 以下两行是为了在jupyter notebook 中不重复输出日志
    if logger.root.handlers:
        logger.root.handlers[0].setLevel(logging.WARNING)
 
    handler_stdout = logging.StreamHandler()
    handler_stdout.setLevel(logging.INFO)
    handler_stdout.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
    # logger.addHandler(handler_stdout)
 
    handler_file = logging.FileHandler(filename=file_name, mode='w', encoding='utf-8')
    handler_file.setLevel(logging.DEBUG)
    handler_file.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
    logger.addHandler(handler_file)
 
    return logger

logger = get_logger(__name__,file_name)

logger.debug('这是希望输出的debug内容')
logger.info('这是希望输出的info内容')
logger.warning('这是希望输出的warning内容')

## Get source data 

In [26]:
def get_n2n_data(x, y, x_len):
    length = len(x)
    assert length == len(y)
    assert length == len(x_len)
    new_x = []
    new_y = []
    new_x_len = []
    for i in range(length):
        for j in range(len(x[i])):
            new_x.append(x[i][:j+1])
            new_y.append(y[i][j])
            new_x_len.append(j+1)
    return new_x, new_y, new_x_len

In [27]:
source_data_path = './data/Challenge/'
small_part = False
arg_timestep = 1.0
batch_size = 256
epochs = 100
all_x_source = pickle.load(open(source_data_path + 'new_x_front_fill.dat', 'rb'))
all_y_source = pickle.load(open(source_data_path + 'new_y_front_fill.dat', 'rb'))
all_names_source = pickle.load(open(source_data_path + 'new_name.dat', 'rb'))
static_source = pickle.load(open(source_data_path + 'new_demo_front_fill.dat', 'rb'))
mask_x_source = pickle.load(open(source_data_path + 'new_mask_x.dat', 'rb'))
mask_demo_source = pickle.load(open(source_data_path + 'new_mask_demo.dat', 'rb'))
all_x_len_source = [len(i) for i in all_x_source]

if target_dataset == 'PD':
    subset_idx_source = [31, 29, 28, 33, 25, 18, 7, 21, 16, 15, 19, 17, 24, 3, 5, 0]
elif target_dataset == 'TJ':
    subset_idx_source = [27, 29, 18, 16, 26, 33, 28, 31, 32, 15, 11, 25, 21, 20, 9, 17, 30, 19]
elif target_dataset == 'HM':
    subset_idx_source = [0, 1, 2, 3, 5, 9, 11, 12, 13, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33]

subset_cnt = len(subset_idx_source)
other_idx = []
for i in range(len(all_x_source[0][0])):
    if i not in subset_idx_source:
        other_idx.append(i)

for i in range(len(all_x_source)): #将共同特征移动到最开始，非共同特征移动到末尾
    cur = np.array(all_x_source[i], dtype=float)
    cur_mask = np.array(mask_x_source[i])
    cur_subset = cur[:, subset_idx_source]
    cur_other = cur[:, other_idx]
    cur_mask_subset = cur_mask[:, subset_idx_source]
    cur_mask_other = cur_mask[:, other_idx]
    all_x_source[i] = np.concatenate((cur_subset, cur_other), axis=1).tolist()
    mask_x_source[i] = np.concatenate((cur_mask_subset, cur_mask_other), axis=1).tolist()


train_num_source =int( len(all_x_source) * 0.8) + 1
logger.info(train_num_source)
dev_num_source =int( len(all_x_source) * 0.1) + 1
logger.info(dev_num_source)
test_num_source =int( len(all_x_source) * 0.1)
logger.info(test_num_source)
assert(train_num_source+dev_num_source+test_num_source == len(all_x_source))

train_x_source = []
train_y_source = []
train_names_source = []
train_static_source = []
train_x_len_source = []
train_mask_x_source = []
for idx in range(train_num_source):
    train_x_source.append(all_x_source[idx])
    train_y_source.append(int(all_y_source[idx][-1]))
    train_names_source.append(all_names_source[idx])
    train_static_source.append(static_source[idx])
    train_x_len_source.append(all_x_len_source[idx])
    train_mask_x_source.append(mask_x_source[idx])

dev_x_source = []
dev_y_source = []
dev_names_source = []
dev_static_source = []
dev_x_len_source = []
dev_mask_x_source = []
for idx in range(train_num_source, train_num_source + dev_num_source):
    dev_x_source.append(all_x_source[idx])
    dev_y_source.append(int(all_y_source[idx][-1]))
    dev_names_source.append(all_names_source[idx])
    dev_static_source.append(static_source[idx])
    dev_x_len_source.append(all_x_len_source[idx])
    dev_mask_x_source.append(mask_x_source[idx])


test_x = []
test_y = []
test_names = []
test_static = []
test_x_len = []
test_mask_x = []
for idx in range(train_num_source + dev_num_source, train_num_source + dev_num_source + test_num_source):
    test_x.append(all_x_source[idx])
    test_y.append(int(all_y_source[idx][-1]))
    test_names.append(all_names_source[idx])
    test_static.append(static_source[idx])
    test_x_len.append(all_x_len_source[idx])
    test_mask_x.append(mask_x_source[idx])


assert(len(train_x_source) == train_num_source)
assert(len(dev_x_source) == dev_num_source)
assert(len(test_x) == test_num_source)

long_x_source = all_x_source
long_y_source = [y[-1] for y in all_y_source]


In [28]:
def get_loss(y_pred, y_true):
    loss = torch.nn.BCELoss()
    return loss(y_pred, y_true)
def get_re_loss(y_pred, y_true):
    loss = torch.nn.MSELoss()
    return loss(y_pred, y_true)
def get_kl_loss(x_pred, x_target):
    loss = torch.nn.KLDivLoss(reduce=True, size_average=True)
    return loss(x_pred, x_target)
def get_wass_dist(x_pred, x_target):
    m1 = torch.mean(x_pred, dim=0)
    m2 = torch.mean(x_target, dim=0)
    v1 = torch.var(x_pred, dim=0)
    v2 = torch.var(x_target, dim=0)
    p1 = torch.sum(torch.pow((m1 - m2), 2))
    p2 = torch.sum(torch.pow(torch.pow(v1, 1/2) - torch.pow(v2, 1/2), 2))
    return torch.pow(p1+p2, 1/2)

In [29]:
def pad_sents(sents, pad_token):
#     print(f'len(pad_token) is {len(pad_token)}')
#     print(f'sents is {sents}')

    sents_padded = []

    max_length = max([len(_) for _ in sents])
    for i in sents:
        padded = list(i) + [pad_token]*(max_length-len(i))
#         print(f'padded is {padded}')
        sents_padded.append(np.array(padded))


    return np.array(sents_padded)

In [30]:
def batch_iter(x, y, lens, batch_size, shuffle=False):
    """ Yield batches of source and target sentences reverse sorted by length (largest to smallest).
    @param data (list of (src_sent, tgt_sent)): list of tuples containing source and target sentence
    @param batch_size (int): batch size
    @param shuffle (boolean): whether to randomly shuffle the dataset
    """
    # batch_num = math.ceil(len(x) / batch_size) # 向下取整
    batch_num = len(x) // batch_size if len(x) % batch_size == 0 else len(x) // batch_size + 1
    # print(f"len(x) is {len(x)}, len(y) is {len(y)}, len(lens) is {len(lens)}, batch_size is {batch_size}, batch_num is {batch_num}")
    index_array = list(range(len(x)))

    if shuffle:
        np.random.shuffle(index_array)

    for i in range(batch_num):
        if (i + 1) * batch_size  < len(x):
            indices = index_array[i * batch_size: (i + 1) * batch_size] #  fetch out all the induces
        else:
            indices = index_array[i * batch_size: ]
        examples = []
        for idx in indices:
            examples.append((x[idx], y[idx],lens[idx]))
       
        examples = sorted(examples, key=lambda e: len(e[0]), reverse=True)
    
        batch_x = [e[0] for e in examples]
        batch_y = [e[1] for e in examples]
        batch_lens = [e[2] for e in examples]

        yield batch_x, batch_y, batch_lens


In [31]:
def length_to_mask(length, max_len=None, dtype=None):
    """length: B.
    return B x max_len.
    If max_len is None, then max of length will be used.
    """
    assert len(length.shape) == 1, 'Length shape should be 1 dimensional.'
    max_len = max_len or length.max().item()
    mask = torch.arange(max_len, device=length.device,
                        dtype=length.dtype).expand(len(length), max_len) < length.unsqueeze(1)
    if dtype is not None:
        mask = torch.as_tensor(mask, dtype=dtype, device=length.device)
    return mask

In [32]:
class ReverseLayerF(Function):

    @staticmethod
    def forward(ctx, x, alpha):
        ctx.alpha = alpha

        return x.view_as(x)

    @staticmethod
    def backward(ctx, grad_output):
        output = grad_output.neg() * ctx.alpha

        return output, None
def clones(module, N):
    "Produce N identical layers."
    return nn.ModuleList([copy.deepcopy(module) for _ in range(N)])

class codats(nn.Module):
    def __init__(self, common_dim, hidden_dim, d_model,  MHD_num_head, d_ff, output_dim, keep_prob=0.5):
        super(codats, self).__init__()

        # hyperparameters
        self.input_dim = common_dim
        self.hidden_dim = hidden_dim  # d_model
        self.d_model = d_model
        self.MHD_num_head = MHD_num_head
        self.d_ff = d_ff
        self.output_dim = output_dim
        self.keep_prob = keep_prob

        # layers
        self.layers = nn.Sequential(
                nn.Conv1d(1, hidden_dim, kernel_size=8, padding=4, bias=False),
                nn.BatchNorm1d(hidden_dim),
                nn.ReLU(),

                nn.Conv1d(hidden_dim, 2*hidden_dim, kernel_size=5, padding=2, bias=False),
                nn.BatchNorm1d(2*hidden_dim),
                nn.ReLU(),

                nn.Conv1d(2*hidden_dim, hidden_dim, kernel_size=3, padding=1, bias=False),
                nn.BatchNorm1d(hidden_dim),
                nn.ReLU(),

                nn.AdaptiveAvgPool1d(1),
            )
        

        self.demo_proj_main = nn.Linear(12, self.hidden_dim)
        self.demo_proj = nn.Linear(12, self.hidden_dim)
        self.Linear = nn.Linear(self.hidden_dim, 1)
        self.output = nn.Linear(self.input_dim, self.output_dim)

        # adversal方法中的域分类器  
        self.domain_classifier = nn.Sequential()
        self.domain_classifier.add_module('d_fc1', nn.Linear(self.hidden_dim, self.hidden_dim))
        self.domain_classifier.add_module('d_bn1', nn.BatchNorm1d(self.hidden_dim))
        self.domain_classifier.add_module('d_relu1', nn.ReLU(True))
        self.domain_classifier.add_module('d_fc2', nn.Linear(hidden_dim, 2))
        self.domain_classifier.add_module('d_softmax', nn.LogSoftmax(dim=1))

        self.dropout = nn.Dropout(p = 1 - self.keep_prob)
        self.FC_embed = nn.Linear(self.hidden_dim, self.hidden_dim)
        self.tanh=nn.Tanh()
        self.softmax = nn.Softmax()
        self.sigmoid = nn.Sigmoid()
        self.relu=nn.ReLU()
        self.to_MMD = nn.Linear(self.hidden_dim, 1)

    def forward(self, input, lens, alpha, is_teacher):
        lens = lens.to('cpu')
        batch_size = input.size(0)
        time_step = input.size(1)
        feature_dim = input.size(2)
        assert(feature_dim == self.input_dim)# input Tensor : 256 * 48 * 76
        assert(self.d_model % self.MHD_num_head == 0)
        
        codats_embeded_input = self.layers(input[:,:,0].squeeze().unsqueeze(1)).unsqueeze(1)
        # print(f'input[:,:,i].shape is {input[:,:,0].shape}')
        for i in range(1, feature_dim):
            embeded_input = self.layers(input[:,:,i].squeeze().unsqueeze(1)).unsqueeze(1)
            codats_embeded_input = torch.cat((codats_embeded_input, embeded_input), 1)
            # print(f'docats_embeded_input.shape is {codats_embeded_input.shape}')
        codats_embeded_input = codats_embeded_input.squeeze()
        # print(f'codats_embeded_input.shape is {codats_embeded_input.shape}')
        # GRU_embeded_input = self.GRUs[0](pack_padded_sequence(input[:,:,0].unsqueeze(-1), lens, batch_first=True))[1].squeeze().unsqueeze(1) # b 1 h
        # for i in range(feature_dim-1):
        #     embeded_input = self.GRUs[i+1](pack_padded_sequence(input[:,:,i+1].unsqueeze(-1), lens, batch_first=True))[1].squeeze().unsqueeze(1) # b 1 h
        #     GRU_embeded_input = torch.cat((GRU_embeded_input, embeded_input), 1)
        # print(f"GRU_embeded_input.shape is {GRU_embeded_input.shape}")
        


        if is_teacher: # 来自源数据集
            common_input = codats_embeded_input[:, 0, :]
            for i in range(1, feature_dim):
                common_input = common_input + codats_embeded_input[:, i, :]  
            # print(f"common_input1.shape is {common_input.shape}")
            common_input = torch.squeeze(common_input, 1) # batch * hidden
            reverse_input = ReverseLayerF.apply(common_input, alpha)
            # print(f"common_input2.shape is {common_input.shape}")
            domain_output = self.domain_classifier(reverse_input)

            posi_input = self.dropout(codats_embeded_input) # batch_size * d_input + d_input_diff * hidden_dim
            
            contexts = self.Linear(posi_input).squeeze()# b i
            output = self.output(self.dropout(contexts))# b 1
            output = self.sigmoid(output)
            return output, domain_output, contexts
        else: # 来自目标数据集，主要是为了混淆domain classifier
            common_input = codats_embeded_input[:, 0, :]
            for i in range(1, feature_dim):
                common_input = common_input + codats_embeded_input[:, i, :]  
            common_input = torch.squeeze(common_input, 1) # batch * hidden
            reverse_input = ReverseLayerF.apply(common_input, alpha)
            domain_output = self.domain_classifier(reverse_input)
            return domain_output

In [33]:
#split the data
def getSplitData(x, lens, y):
    train_num =int( len(x) * 0.8) + 1
    dev_num =int( len(x) * 0.1) + 1
    test_num = len(x) - train_num - dev_num
    train_x = []
    train_y = []
    train_len = []
    for idx in range(train_num):
        train_x.append(x[idx])
        train_y.append(int(y[idx][-1]))
        train_len.append(lens[idx])

    dev_x = []
    dev_y = []
    dev_len = []
    for idx in range(train_num, train_num + dev_num):
        dev_x.append(x[idx])
        dev_y.append(int(y[idx][-1]))
        dev_len.append(lens[idx])

    test_x = []
    test_y = []
    test_len = []

    for idx in range(train_num + dev_num, train_num + dev_num + test_num):
        test_x.append(x[idx])
        test_y.append(int(y[idx][-1]))
        test_len.append(lens[idx])
    return train_x, train_y, train_len, dev_x, dev_y, dev_len, test_x, test_y, test_len

In [34]:
logger.info("load target data")
if target_dataset == 'PD':
    data_path = './data/PD/'
    all_x_target = pickle.load(open(data_path + 'x.pkl', 'rb'))
    all_time_target = pickle.load(open(data_path + 'y_z.pkl', 'rb'))
    all_x_len_target = [len(i) for i in all_x_target]

    subset_idx_target = [0, 2, 3, 4, 5, 7, 8, 9, 12, 16, 17, 19, 20, 56, 57, 58]
    other_idx_target = list(range(69))
    for i in subset_idx_target:
        other_idx_target.remove(i)
    for i in range(len(all_x_target)):
        cur = np.array(all_x_target[i], dtype=float)
        cur_subset = cur[:, subset_idx_target]
        cur_other = cur[:, other_idx_target]
        all_x_target[i] = np.concatenate((cur_subset, cur_other), axis=1).tolist()
elif target_dataset == 'TJ':
    data_path = './data/Tongji/'
    all_x_target = pickle.load(open(data_path + 'x.pkl', 'rb'))
    all_y_target = pickle.load(open(data_path + 'y.pkl', 'rb'))
    all_time_target = pickle.load(open(data_path + 'y.pkl', 'rb'))
    all_x_len_target = [len(i) for i in all_x_target]

    for i in range(len(all_time_target)):
        for j in range(len(all_time_target[i])):
            all_time_target[i][j] = all_time_target[i][j][-1]
            all_y_target[i][j] = all_y_target[i][j][0]

    subset_idx_target = [2, 3, 4, 9, 13, 14, 26, 27, 30, 32, 34, 38, 39, 41, 52, 53, 66, 74]
    other_idx_target = list(range(75))
    for i in subset_idx_target:
        other_idx_target.remove(i)
    for i in range(len(all_x_target)):
        cur = np.array(all_x_target[i], dtype=float)
        cur_subset = cur[:, subset_idx_target]
        cur_other = cur[:, other_idx_target]
        all_x_target[i] = np.concatenate((cur_subset, cur_other), axis=1).tolist()
elif target_dataset == 'HM':
    data_path = './data/CDSL/'
    all_x_target = pickle.load(open(data_path + 'x.pkl', 'rb'))
    all_y_target = pickle.load(open(data_path + 'y.pkl', 'rb'))
    all_time_target = pickle.load(open(data_path + 'y.pkl', 'rb'))
    all_x_len_target = [len(i) for i in all_x_target]

    for i in range(len(all_time_target)):
        for j in range(len(all_time_target[i])):
            all_time_target[i][j] = all_time_target[i][j][-1]
            all_y_target[i][j] = all_y_target[i][j][0]

    subset_idx_target = [5, 6, 4, 2, 3, 48, 79, 76, 87, 25, 30, 31, 18, 43, 58, 66, 40, 57, 23, 92, 50, 54, 91, 60, 39, 81]
    other_idx_target= list(range(99))
    for i in subset_idx_target:
        other_idx_target.remove(i)
    for i in range(len(all_x_target)):
        cur = np.array(all_x_target[i], dtype=float)
        cur_subset = cur[:, subset_idx_target]
        cur_other = cur[:, other_idx_target]
    #     tar_all_x[i] = np.concatenate((cur_subset, cur_other), axis=1).tolist()
        all_x_target[i] = np.concatenate((cur_subset, cur_other), axis=1).tolist()
    
if target_dataset == 'PD':
    all_x_target = all_x_target
    all_y_target = all_time_target
elif  target_dataset == 'HM' or target_dataset == 'TJ':
    examples = []
    for idx in range(len(all_x_target)):
        examples.append((all_x_target[idx], all_y_target[idx], all_time_target[idx], all_x_len_target[idx]))
    examples = sorted(examples, key=lambda e: len(e[0]), reverse=True)
    all_x_target = [e[0] for e in examples]
    all_y_target = [e[1] for e in examples]
    all_time_target = [e[2] for e in examples]
    all_x_len_target = [e[3] for e in examples]

num_source = len(all_x_source)
num_target = len(all_x_target)
# print(target_dataset,len(all_x_target), len(all_x_target[0]),len(all_x_target[0][0]))
all_x_target_confuse = []
all_x_len_target_confuse = []
all_y_target_confuse = []
all_x_source_confuse = []
all_x_len_source_confuse = []
all_y_source_confuse = []
repeat_times = 0

if num_source < num_target:
    all_x_target_confuse = all_x_target
    all_y_target_confuse = all_y_target
    all_x_len_target_confuse = all_x_len_target
    while repeat_times * num_source < num_target:
        all_x_source_confuse = all_x_source_confuse + all_x_source
        all_x_len_source_confuse = all_x_len_source_confuse + all_x_len_source
        all_y_source_confuse =  all_y_source_confuse + all_y_source
        repeat_times = repeat_times + 1
    all_x_source_confuse = all_x_source_confuse[:num_target]
    all_x_len_source_confuse = all_x_len_source_confuse[:num_target]
    all_y_source_confuse = all_y_source_confuse[:num_target]
elif num_target < num_source:
    all_x_source_confuse = all_x_source
    all_x_len_source_confuse = all_x_len_source
    all_y_source_confuse = all_y_source
    while repeat_times * num_target < num_source:
        all_x_target_confuse = all_x_target_confuse + all_x_target
        all_x_len_target_confuse = all_x_len_target_confuse + all_x_len_target
        all_y_target_confuse = all_y_target_confuse + all_y_target
        repeat_times = repeat_times + 1
    all_x_target_confuse = all_x_target_confuse[:num_source]
    all_x_len_target_confuse = all_x_len_target_confuse[:num_source]
    all_y_target_confuse = all_y_target_confuse[:num_source]

# print(f"len(all_x_source_confuse) is {len(all_x_source_confuse)}, len(all_x_target_confuse) is {len(all_x_target_confuse)}")

#todo 划分train、dev、test 
# all_x_source_confuse = pad_sents(all_x_source_confuse, pad_token_source)
# all_x_target_confuse = pad_sents(all_x_target_confuse, pad_token_target)
train_x_source_confuse, train_y_source_confuse, train_len_source_confuse, dev_x_source_confuse, dev_y_source_confuse, dev_len_source_confuse, test_x_source_confuse,\
test_y_source_confuse, test_len_source_confuse = getSplitData(all_x_source_confuse, all_x_len_source_confuse, all_y_source_confuse)

train_x_target_confuse, train_y_target_confuse, train_len_target_confuse, dev_x_target_confuse, dev_y_target_confuse, dev_len_target_confuse, test_x_target_confuse,\
test_y_target_confuse, test_len_target_confuse = getSplitData(all_x_target_confuse, all_x_len_target_confuse, all_y_target_confuse)

# long_x_source = all_x_source
# long_y_source = [y[-1] for y in all_y_source]



In [35]:
pad_token_source = np.zeros(34)
if target_dataset == 'PD':
    pad_token_target = np.zeros(69)
elif target_dataset == 'TJ':
    pad_token_target = np.zeros(75)
elif target_dataset == 'HM':
    pad_token_target = np.zeros(99)

In [36]:
epochs = 50
batch_size = 256
common_dim = subset_cnt 
hidden_dim = 64
d_model = 64
MHD_num_head = 4
d_ff = 64
output_dim = 1
model_student = codats(common_dim = common_dim, hidden_dim = hidden_dim, d_model=d_model, MHD_num_head=MHD_num_head, d_ff=d_ff, output_dim = output_dim).to(device)
optimizer_student = torch.optim.Adam(model_student.parameters(), lr=1e-3)

In [37]:
class MultitaskLoss(nn.Module):
    def __init__(self, task_num=2):
        super(MultitaskLoss, self).__init__()
        self.task_num = task_num
        self.alpha = nn.Parameter(torch.ones((task_num)), requires_grad=True)
        self.bce = nn.BCELoss()
        self.kl = nn.KLDivLoss(reduce=True, size_average=True)

    def forward(self, opt_student, batch_y, emb_student, emb_teacher, tar_source, tar_tar):
        BCE_Loss = self.bce(opt_student, batch_y)
        emb_Loss = self.kl(emb_student, emb_teacher)
        return BCE_Loss * self.alpha[0] + emb_Loss * self.alpha[1]

def get_multitask_loss(opt_student, batch_y, emb_student, emb_teacher):
    mtl = MultitaskLoss(task_num=3)
    return mtl(opt_student, batch_y, emb_student, emb_teacher)

In [38]:
if target_dataset == 'PD':
    data_str = 'pd'
elif target_dataset == 'TJ':
    data_str = 'covid'
elif target_dataset == 'HM':
    data_str = 'spain'


# if teacher_flag:
#     file_name = './model/pretrained-challenge-front-fill-2'+ data_str
# else: 
#     file_name = './model/pretrained-challenge-front-fill-2'+ data_str + '-noteacher'

file_name = './model/pretrained-codats'+ data_str;

In [39]:
# # Training dann model
# # If you don't want to train Student Model:
# # - The pretrained student model is in direcrtory './model/', and can be directly loaded, 
# # - Simply skip this cell and load the model to validate on Dev Dataset.

# logger.info('Training Student')
# teacher_flag = False
# epochs = 30
# total_train_loss = []
# total_valid_loss = []
# global_best = 0
# auroc = []
# auprc = []
# minpse = []
# history = []
# # begin_time = time.time()
# best_auroc = 0
# best_auprc = 0
# best_minpse = 0
# best_total_loss = 0x3f3f3f3f
# loss_domain = torch.nn.NLLLoss()
# loss_predict = torch.nn.MSELoss()
# loss_embed = nn.KLDivLoss(reduce=True, size_average=True)




# print(f'len(train_source_iter) is {len(train_x_source_confuse)}, len(train_target_iter) is {len(train_x_target_confuse)}, steps is {len(train_x_source_confuse) // batch_size + 1}')



# for each_epoch in range(epochs):
#     train_source_iter = batch_iter(train_x_source_confuse, train_y_source_confuse, train_len_source_confuse, batch_size=batch_size)
#     dev_source_iter = batch_iter(dev_x_source_confuse, dev_y_source_confuse, dev_len_source_confuse, batch_size=batch_size)
#     test_source_iter = batch_iter(test_x_source_confuse, test_y_source_confuse, test_len_source_confuse, batch_size=batch_size)
#     train_target_iter = batch_iter(train_x_target_confuse, train_y_target_confuse, train_len_target_confuse, batch_size=batch_size)
#     dev_target_iter = batch_iter(dev_x_target_confuse, dev_y_target_confuse, dev_len_target_confuse, batch_size=batch_size)
#     test_target_iter = batch_iter(test_x_target_confuse, test_y_target_confuse, test_len_target_confuse, batch_size=batch_size)
#     epoch_loss = []
#     counter_batch = 0
#     model_student.train()  
#     steps = len(train_x_source_confuse) // batch_size + 1 if len(train_x_source_confuse) % batch_size != 0 else len(train_x_source_confuse) // batch_size
#     for step in range(steps):
#         # -----source_domain--------
#         batch_x, batch_y, batch_lens= next(train_source_iter)
#         p = float(step + each_epoch * steps) / epochs / steps
#         alpha = 2. / (1. + np.exp(-10 * p)) - 1
#         optimizer_student.zero_grad()
#         batch_x = torch.tensor(pad_sents(batch_x, pad_token_source), dtype=torch.float32).to(device)
#         batch_y = torch.tensor(batch_y, dtype=torch.float32).to(device)
#         batch_lens = torch.tensor(batch_lens, dtype=torch.float32).to(device).int()
#         # batch_mask_x = torch.tensor(pad_sents(batch_mask_x, pad_token), dtype=torch.float32).to(device)
#         # opt_student, decov_loss_student, emb_student, tar_result = model_student(batch_x[:,:,:subset_cnt], batch_x[:,:,subset_cnt:], batch_lens, [tar_all_x, tar_all_x_len], True)
#         domain_label = torch.zeros(min(batch_size, batch_x.shape[0])).long().to(device)
#         opt_student, opt_domain, emb_student = model_student(batch_x[:,:,:subset_cnt], batch_lens, alpha, True)
#         emb_student = F.log_softmax(emb_student, dim=1)
#         err_predict = loss_predict(opt_student, batch_y)
#         err_domain1 = loss_domain(opt_domain, domain_label)
#             # loss = get_multitask_loss(opt_student, batch_y.unsqueeze(-1), emb_student, emb_teacher)

#         # -----target_domain--------
#         batch_x, batch_y, batch_lens = next(train_target_iter)
#         p = float(step + each_epoch * len(train_x_source)) / epochs / len(train_x_len_source)
#         alpha = 2. / (1. + np.exp(-10 * p)) - 1
#         optimizer_student.zero_grad()
#         batch_x = torch.tensor(pad_sents(batch_x, pad_token_target), dtype=torch.float32).to(device)
#         batch_y = torch.tensor(batch_y, dtype=torch.float32).to(device)
#         batch_lens = torch.tensor(batch_lens, dtype=torch.float32).to(device).int()
#         # batch_mask_x = torch.tensor(pad_sents(batch_mask_x, pad_token), dtype=torch.float32).to(device)
#         # opt_student, decov_loss_student, emb_student, tar_result = model_student(batch_x[:,:,:subset_cnt], batch_x[:,:,subset_cnt:], batch_lens, [tar_all_x, tar_all_x_len], True)
#         domain_label = torch.ones(min(batch_size, batch_x.shape[0])).long().to(device)
#         opt_domain = model_student(batch_x[:,:,:subset_cnt], batch_lens, alpha, False)
#         err_domain2 = loss_domain(opt_domain, domain_label)

#         # -----common--------
#         loss = err_predict + err_domain1 + err_domain2
#         epoch_loss.append(loss.cpu().detach().numpy())
#         loss.backward()
#         torch.nn.utils.clip_grad_norm_(model_student.parameters(), 20)
#         optimizer_student.step()

#         if step % 20 == 0:
#             print('Epoch %d Batch %d: Train Loss = %.4f'%(each_epoch, step, loss.cpu().detach().numpy()))
#             logger.info('Epoch %d Batch %d: Train Loss = %.4f'%(each_epoch, step, loss.cpu().detach().numpy()))

#     epoch_loss = np.mean(epoch_loss)
#     total_train_loss.append(epoch_loss)


#     # dev_source_dataset = MyDataset(dev_x_source_confuse, dev_len_source_confuse, dev_y_source_confuse)
#     # dev_target_dataset = MyDataset(dev_x_target_confuse, dev_len_target_confuse, dev_y_target_confuse)
#     # dev_source_dataloader = DataLoader(dev_source_dataset, batch_size= batch_size)
#     # dev_target_dataloader = DataLoader(dev_target_dataset, batch_size=batch_size)
#     #Validation

#     y_true = []
#     y_pred = []
#     with torch.no_grad():
#         steps = len(dev_x_source_confuse) // batch_size + 1 if len(dev_x_source_confuse) % batch_size != 0 else len(dev_x_source_confuse) // batch_size
#         for step in range(steps):
#             # -----source_domain--------
#             batch_x, batch_y, batch_lens= next(dev_source_iter)
#             p = float(step + each_epoch * steps) / epochs / steps
#             alpha = 2. / (1. + np.exp(-10 * p)) - 1
#             optimizer_student.zero_grad()
#             batch_x = torch.tensor(pad_sents(batch_x, pad_token_source), dtype=torch.float32).to(device)
#             batch_y = torch.tensor(batch_y, dtype=torch.float32).to(device)
#             batch_lens = torch.tensor(batch_lens, dtype=torch.float32).to(device).int()
#             # batch_mask_x = torch.tensor(pad_sents(batch_mask_x, pad_token), dtype=torch.float32).to(device)
#             # opt_student, decov_loss_student, emb_student, tar_result = model_student(batch_x[:,:,:subset_cnt], batch_x[:,:,subset_cnt:], batch_lens, [tar_all_x, tar_all_x_len], True)
#             domain_label = torch.zeros(min(batch_size, batch_x.shape[0])).long().to(device)
#             opt_student, opt_domain, emb_student = model_student(batch_x[:,:,:subset_cnt], batch_lens, alpha, True)
#             # emb_teacher = torch.tensor(dev_teacher_emb[step], dtype=torch.float32).to(device)
#             emb_student = F.log_softmax(emb_student, dim=1)
#             # err_emb = loss_embed(emb_student, emb_teacher) #todo 是否考虑它
#             err_predict = loss_predict(opt_student, batch_y)
#             err_domain1 = loss_domain(opt_domain, domain_label)
#                 # loss = get_multitask_loss(opt_student, batch_y.unsqueeze(-1), emb_student, emb_teacher)

#             # -----target_domain--------
#             batch_x, batch_y, batch_lens = next(dev_target_iter)
#             optimizer_student.zero_grad()
#             batch_x = torch.tensor(pad_sents(batch_x, pad_token_target), dtype=torch.float32).to(device)
#             batch_y = torch.tensor(batch_y, dtype=torch.float32).to(device)
#             batch_lens = torch.tensor(batch_lens, dtype=torch.float32).to(device).int()
#             # batch_mask_x = torch.tensor(pad_sents(batch_mask_x, pad_token), dtype=torch.float32).to(device)
#             # opt_student, decov_loss_student, emb_student, tar_result = model_student(batch_x[:,:,:subset_cnt], batch_x[:,:,subset_cnt:], batch_lens, [tar_all_x, tar_all_x_len], True)
#             domain_label = torch.ones(min(batch_size, batch_x.shape[0])).long().to(device)
#             opt_domain = model_student(batch_x[:,:,:subset_cnt], batch_lens, alpha, False)
#             err_domain2 = loss_domain(opt_domain, domain_label)

#             # -----common--------
#             loss = err_domain1 + err_domain2
#             if loss < best_total_loss:
#                 best_total_loss = loss
#                 state = {
#                     'net': model_student.state_dict(),
#                     'optimizer': optimizer_student.state_dict(),
#                     'epoch': each_epoch
#                 }
#                 torch.save(state, file_name)
#                 print('------------ Save best model - TOTAL_LOSS: %.4f ------------'%best_total_loss)
#                 logger.info('------------ Save best model - TOTAL_LOSS: %.4f ------------'%best_total_loss)


In [40]:
checkpoint = torch.load(file_name, \
                        map_location=torch.device("cuda:3" if torch.cuda.is_available() == True else 'cpu') )
save_epoch = checkpoint['epoch']
print("last saved model is in epoch {}".format(save_epoch))
logger.info("last saved model is in epoch {}".format(save_epoch))
model_student.load_state_dict(checkpoint['net'])
optimizer_student.load_state_dict(checkpoint['optimizer'])
model_student.eval()

last saved model is in epoch 27


codats(
  (layers): Sequential(
    (0): Conv1d(1, 64, kernel_size=(8,), stride=(1,), padding=(4,), bias=False)
    (1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Conv1d(64, 128, kernel_size=(5,), stride=(1,), padding=(2,), bias=False)
    (4): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU()
    (6): Conv1d(128, 64, kernel_size=(3,), stride=(1,), padding=(1,), bias=False)
    (7): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (8): ReLU()
    (9): AdaptiveAvgPool1d(output_size=1)
  )
  (demo_proj_main): Linear(in_features=12, out_features=64, bias=True)
  (demo_proj): Linear(in_features=12, out_features=64, bias=True)
  (Linear): Linear(in_features=64, out_features=1, bias=True)
  (output): Linear(in_features=16, out_features=1, bias=True)
  (domain_classifier): Sequential(
    (d_fc1): Linear(in_features=64, out_features=64, bias=True)
  

In [41]:
#anchor
batch_loss = []
y_true = []
y_pred = []
with torch.no_grad():
    model_student.eval()
    test_target_iter = batch_iter(test_x_target_confuse, test_y_target_confuse, test_len_target_confuse, batch_size=batch_size, shuffle=True)
    steps = len(test_x_target_confuse) // batch_size + 1 if len(test_x_target_confuse) % batch_size != 0 else len(test_x_target_confuse) // batch_size
    for step in range(steps):
        # -----target_domain--------
        batch_x, batch_y, batch_lens= next(test_target_iter) 
        optimizer_student.zero_grad()
        batch_x = torch.tensor(pad_sents(batch_x, pad_token_target), dtype=torch.float32).to(device)
        batch_y = torch.tensor(batch_y, dtype=torch.float32).to(device)
        batch_lens = torch.tensor(batch_lens, dtype=torch.float32).to(device).int()
        masks = length_to_mask(batch_lens).unsqueeze(-1).float()
        opt, _, _  = model_student(batch_x[:,:,:subset_cnt], batch_lens, 1, True)

        BCE_Loss = get_loss(opt, batch_y.unsqueeze(-1))
#             REC_Loss = F.mse_loss(masks * recon, masks * batch_x, reduction='mean').to(device)

        model_loss =  BCE_Loss 

        loss = model_loss
        batch_loss.append(loss.cpu().detach().numpy())
        if step % 20 == 0:
            print('Batch %d: Test Loss = %.4f'%(step, loss.cpu().detach().numpy()))
            logger.info('Batch %d: Test Loss = %.4f'%(step, loss.cpu().detach().numpy()))
        y_pred += list(opt.cpu().detach().numpy().flatten())
        y_true += list(batch_y.cpu().numpy().flatten())

print("\n==>Predicting on test")
print('Test Loss = %.4f'%(np.mean(np.array(batch_loss))))
logger.info("\n==>Predicting on test")
logger.info('Test Loss = %.4f'%(np.mean(np.array(batch_loss))))
y_pred = np.array(y_pred)
y_pred = np.stack([1 - y_pred, y_pred], axis=1)
# test_res = metrics.print_metrics_binary(y_true, y_pred)

Batch 0: Test Loss = -2.8663

==>Predicting on test
Test Loss = -2.9028


In [42]:
if target_dataset == 'PD':
    source_common_idx = [31, 29, 28, 33, 25, 18, 7, 21, 16, 15, 19, 17, 24, 3, 5, 0]
    target_common_idx = [0, 2, 3, 4, 5, 7, 8, 9, 12, 16, 17, 19, 20, 56, 57, 58]
    source_data_path = './data/Challenge/'
    source_x = pickle.load(open(source_data_path + 'new_x_front_fill.dat', 'rb'))
    target_data_path = './data/PD/'
    target_x = pickle.load(open(target_data_path + 'x.pkl', 'rb'))
elif target_dataset == 'TJ':
    source_common_idx = [27, 29, 18, 16, 26, 33, 28, 31, 32, 15, 11, 25, 21, 20, 9, 17, 30, 19]
    target_common_idx = [2, 3, 4, 9, 13, 14, 26, 27, 30, 32, 34, 38, 39, 41, 52, 53, 66, 74]
    source_data_path = './data/Challenge/'
    source_x = pickle.load(open(source_data_path + 'new_x_front_fill.dat', 'rb'))
    target_data_path = './data/Tongji/'
    target_x = pickle.load(open(target_data_path + 'x.pkl', 'rb'))

elif target_dataset == 'HM':
    source_common_idx = [0, 1, 2, 3, 5, 9, 11, 12, 13, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33]
    target_common_idx = [5, 6, 4, 2, 3, 48, 79, 76, 87, 25, 30, 31, 18, 43, 58, 66, 40, 57, 23, 92, 50, 54, 91, 60, 39, 81]
    source_data_path = './data/Challenge/'
    source_x = pickle.load(open(source_data_path + 'new_x_front_fill.dat', 'rb'))
    target_data_path = './data/CDSL/'
    target_x = pickle.load(open(target_data_path + 'x.pkl', 'rb'))

assert(len(source_common_idx) == len(target_common_idx))
common_len = len(source_common_idx)
source_x_diff = []
target_x_diff = []

source_total_len = 34
source_other_idx = list(range(source_total_len))
for i in source_common_idx:
    source_other_idx.remove(i)

if target_dataset == 'PD':
    target_total_len = 69
    target_other_idx = list(range(target_total_len))
    for i in target_common_idx:
        target_other_idx.remove(i)
elif target_dataset == 'TJ':
    target_other_idx = list(range(75))
    target_total_len = 75
    for i in target_common_idx:
        target_other_idx.remove(i)
elif target_dataset == 'HM':
    target_other_idx = list(range(99))
    target_total_len = 99
    for i in target_common_idx:
        target_other_idx.remove(i)

for i in range(len(source_x)):
    cur = np.array(source_x[i], dtype=float)
    cur_subset = cur[:, source_common_idx]
    cur_other = cur[:, source_other_idx]
    source_x_diff.append(cur_other.tolist())
    source_x[i] = np.concatenate((cur_subset, cur_other), axis=1).tolist()

for i in range(len(target_x)):
    cur = np.array(target_x[i], dtype=float)
    cur_subset = cur[:, target_common_idx]
    cur_other = cur[:, target_other_idx]
    target_x_diff.append(cur_other.tolist())
    target_x[i] = np.concatenate((cur_subset, cur_other), axis=1).tolist()


source_max = 0
for i in range(len(source_x_diff)):
    if source_max < len(source_x_diff[i]):
        source_max = len(source_x_diff[i])

source_x_diff_longest = max(list(len(_) for _ in source_x_diff))
source_x_longest = max(list(len(_) for _ in source_x))
source_batch = len(source_x_diff)
source_diff_features = source_total_len - common_len
source_x_diff_ex = torch.zeros((source_batch, source_x_diff_longest, source_diff_features))
source_x_ex = torch.zeros((source_batch, source_x_longest, source_total_len))

for i in range(len(source_x_diff)):
    for j in range(source_x_diff_longest):
        cur_len = len(source_x_diff[i])
        if j < cur_len:
            source_x_diff_ex[i,j,:] = torch.Tensor(source_x_diff[i])[j,:]
        else:
            source_x_diff_ex[i,j,:] = torch.Tensor(source_x_diff[i])[cur_len - 1,:]

for i in range(len(source_x)):
    for j in range(source_x_longest):
        cur_len = len(source_x[i])
        if j < cur_len:
            source_x_ex[i,j,:] = torch.Tensor(source_x[i])[j,:]
        else:
            source_x_ex[i,j,:] = torch.Tensor(source_x[i])[cur_len - 1,:]

target_x_diff_longest = max(list(len(_) for _ in target_x_diff))
target_batch = len(target_x_diff)
target_features = target_total_len - common_len
target_x_diff_ex = torch.zeros((target_batch, target_x_diff_longest, target_features))

for i in range(len(target_x_diff)):
    for j in range(target_x_diff_longest):
        cur_len = len(target_x_diff[i])
        if j < cur_len:
            target_x_diff_ex[i,j,:] = torch.Tensor(target_x_diff[i])[j,:]
        else:
            target_x_diff_ex[i,j,:] = torch.Tensor(target_x_diff[i])[cur_len - 1,:]


In [43]:
logger.info("Transfer Target Dataset & Model")

if target_dataset == 'PD':
    data_path = './data/PD/'
    all_x = pickle.load(open(data_path + 'x.pkl', 'rb'))
    all_time = pickle.load(open(data_path + 'y_z.pkl', 'rb'))
    all_x_len = [len(i) for i in all_x]

    tar_subset_idx = [0, 2, 3, 4, 5, 7, 8, 9, 12, 16, 17, 19, 20, 56, 57, 58]
    tar_other_idx = list(range(69))
    for i in tar_subset_idx:
        tar_other_idx.remove(i)
    for i in range(len(all_x)):
        cur = np.array(all_x[i], dtype=float)
        cur_subset = cur[:, tar_subset_idx]
        cur_other = cur[:, tar_other_idx]
        all_x[i] = np.concatenate((cur_subset, cur_other), axis=1).tolist()
elif target_dataset == 'TJ':
    data_path = './data/Tongji/'
    all_x = pickle.load(open(data_path + 'x.pkl', 'rb'))
    all_y = pickle.load(open(data_path + 'y.pkl', 'rb'))
    all_time = pickle.load(open(data_path + 'y.pkl', 'rb'))
    all_x_len = [len(i) for i in all_x]

    for i in range(len(all_time)):
        for j in range(len(all_time[i])):
            all_time[i][j] = all_time[i][j][-1]
            all_y[i][j] = all_y[i][j][0]

    tar_subset_idx = [2, 3, 4, 9, 13, 14, 26, 27, 30, 32, 34, 38, 39, 41, 52, 53, 66, 74]
    tar_other_idx = list(range(75))
    for i in tar_subset_idx:
        tar_other_idx.remove(i)
    for i in range(len(all_x)):
        cur = np.array(all_x[i], dtype=float)
        cur_subset = cur[:, tar_subset_idx]
        cur_other = cur[:, tar_other_idx]
        all_x[i] = np.concatenate((cur_subset, cur_other), axis=1).tolist()
elif target_dataset == 'HM':
    data_path = './data/CDSL/'
    all_x = pickle.load(open(data_path + 'x.pkl', 'rb'))
    all_y = pickle.load(open(data_path + 'y.pkl', 'rb'))
    all_time = pickle.load(open(data_path + 'y.pkl', 'rb'))
    all_x_len = [len(i) for i in all_x]

    for i in range(len(all_time)):
        for j in range(len(all_time[i])):
            all_time[i][j] = all_time[i][j][-1]
            all_y[i][j] = all_y[i][j][0]

    tar_subset_idx = [5, 6, 4, 2, 3, 48, 79, 76, 87, 25, 30, 31, 18, 43, 58, 66, 40, 57, 23, 92, 50, 54, 91, 60, 39, 81]
    tar_other_idx = list(range(99))
    for i in tar_subset_idx:
        tar_other_idx.remove(i)
    for i in range(len(all_x)):
        cur = np.array(all_x[i], dtype=float)
        cur_subset = cur[:, tar_subset_idx]
        cur_other = cur[:, tar_other_idx]
        all_x[i] = np.concatenate((cur_subset, cur_other), axis=1).tolist()
    
print(all_x[0])
print(len(all_x[0][0]))
print(len(all_x))
logger.info(all_x[0])
logger.info(len(all_x[0][0]))
logger.info(len(all_x))

[[-0.8427648213988651, 0.3744020210323477, 0.6796123704286434, -1.398975413973587, -0.4831419202847951, -0.2120300841305121, 1.5887596625600091, 0.7945789587268225, -0.8612693268611251, -0.4819729243606949, -0.6745224313841819, 0.7137208435891645, -1.447089954740047, -0.7710128163592748, -1.4231815568069368, -0.5851405270139463, -0.5641898854144399, 0.5775106850669863, 0.3939858698913405, -0.2032969502372001, -0.2890718868318484, 0.1700684310067274, -0.2031244129114749, -0.9752387057279804, -0.995631448716658, -0.7346136214669141, 0.2047416529938912, -0.7879162404292406, -0.4658827214597087, -0.0343615044915247, -1.3314821107815475, 0.3379315521074886, -0.3880554131475662, 0.8285543981909917, 2.770245567717861, 0.0776143028335215, -0.1259757336723928, 0.0863841325254607, -0.1474826847594624, -0.3999358135056357, -0.0116277505661367, -0.0886088512738706, -0.1491049589303728, 0.0552791522161626, -0.0357876785866217, -0.211245000207003, -0.1237195765566573, -0.1259757336723928, 0.04217015

In [44]:
long_x = all_x
# long_y = all_y
# long_y_kfold = [each[-1] for each in all_y]
long_time = all_time

In [45]:

def get_n2n_data(x, y, x_len):
    length = len(x)
    assert length == len(y)
    assert length == len(x_len)
    new_x = []
    new_y = []
    new_x_len = []
    for i in range(length):
        for j in range(len(x[i])):
            new_x.append(x[i][:j+1])
            new_y.append(y[i][j])
            new_x_len.append(j+1)
    return new_x, new_y, new_x_len

In [46]:
class target_model(nn.Module):
    def __init__(self, input_dim, hidden_dim, d_model,  MHD_num_head, d_ff, output_dim, keep_prob=0.5):
        super(target_model, self).__init__()

        # hyperparameters
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim  # d_model
        self.d_model = d_model
        self.MHD_num_head = MHD_num_head
        self.d_ff = d_ff
        self.output_dim = output_dim
        self.keep_prob = keep_prob

        # layers
        self.GRUs = clones(nn.GRU(1, self.hidden_dim, batch_first = True), self.input_dim)
        

        self.demo_proj_main = nn.Linear(12, self.hidden_dim)
        self.demo_proj = nn.Linear(12, self.hidden_dim)
        self.Linear = nn.Linear(self.hidden_dim, 1)
        self.output = nn.Linear(self.input_dim, self.output_dim)

        self.dropout = nn.Dropout(p = 1 - self.keep_prob)
        self.FC_embed = nn.Linear(self.hidden_dim, self.hidden_dim)
        self.tanh=nn.Tanh()
        self.layers = nn.Sequential(
            nn.Conv1d(1, hidden_dim, kernel_size=8, padding=4, bias=False),
            nn.BatchNorm1d(hidden_dim),
            nn.ReLU(),

            nn.Conv1d(hidden_dim, 2*hidden_dim, kernel_size=5, padding=2, bias=False),
            nn.BatchNorm1d(2*hidden_dim),
            nn.ReLU(),

            nn.Conv1d(2*hidden_dim, hidden_dim, kernel_size=3, padding=1, bias=False),
            nn.BatchNorm1d(hidden_dim),
            nn.ReLU(),

            nn.AdaptiveAvgPool1d(1),
        )
        
        self.MLP = nn.Sequential(
            nn.Linear(self.hidden_dim, 8),
            nn.ReLU(),
            nn.Linear(8, self.output_dim)
        )
        self.MLP_outcome = nn.Sequential(
            nn.Linear(self.hidden_dim, 8),
            nn.ReLU(),
            nn.Linear(8, self.output_dim)
        )
        self.softmax = nn.Softmax()
        self.sigmoid = nn.Sigmoid()
        self.relu=nn.ReLU()

    def forward(self, input, lens):
        lens = lens.to('cpu')
        batch_size = input.size(0)
        time_step = input.size(1)
        feature_dim = input.size(2)
        assert(feature_dim == self.input_dim)# input Tensor : 256 * 48 * 76
        assert(self.d_model % self.MHD_num_head == 0)
        codats_embeded_input = self.layers(input[:,:,0].squeeze().unsqueeze(1)).unsqueeze(1)
        # print(f'input[:,:,i].shape is {input[:,:,0].shape}')
        for i in range(1, feature_dim):
            embeded_input = self.layers(input[:,:,i].squeeze().unsqueeze(1)).unsqueeze(1)
            codats_embeded_input = torch.cat((codats_embeded_input, embeded_input), 1)
            # print(f'docats_embeded_input.shape is {codats_embeded_input.shape}')
        codats_embeded_input = codats_embeded_input.squeeze()

        codats_input = codats_embeded_input[:, 0, :]
        for i in range(1, feature_dim):
             codats_input =  codats_input + codats_embeded_input[:, i, :]  
        # print(f"common_input1.shape is {gru_input.shape}")
        codats_input = torch.squeeze(codats_input, 1) # batch * hidden
        # print(f"common_input2.shape is {gru_input.shape}")

        posi_input = self.dropout(codats_input) # batch_size * d_input + d_input_diff * hidden_dim
        output = self.MLP(posi_input)
        outcome = self.MLP_outcome(posi_input)
        outcome = F.sigmoid(outcome)
        if self.output_dim != 1:
            output = F.softmax(output, dim=1)
        return output, outcome

In [47]:
def transfer_gru_dict(pretrain_dict, model_dict):
    state_dict = {}
    
    for k, v in pretrain_dict.items():
        model_point_position1 = k.find('.')
        model_module_name = k[:model_point_position1]
        if "layers" == model_module_name:
            model_point_position2 = k.find('.', model_point_position1+1)
            model_module_idx = int(k[model_point_position1 + 1: model_point_position2])
            print(f'model_module_idx is {model_module_idx}')
            state_dict[k] = pretrain_dict[k]
    return state_dict

In [48]:
if target_dataset == 'PD':
    input_dim = 69
elif target_dataset == 'TJ':
    input_dim = 75
elif target_dataset == 'HM':
    input_dim = 99
    
cell = 'GRU'
hidden_dim = 64
d_model = 64
MHD_num_head = 4
d_ff = 64
output_dim = 1



In [49]:
def ckd_batch_iter(x, y, lens, batch_size, shuffle=False):
    """ Yield batches of source and target sentences reverse sorted by length (largest to smallest).
    @param data (list of (src_sent, tgt_sent)): list of tuples containing source and target sentence
    @param batch_size (int): batch size
    @param shuffle (boolean): whether to randomly shuffle the dataset
    """
    batch_num = math.ceil(len(x) / batch_size) # 向下取整
    index_array = list(range(len(x)))

    if shuffle:
        np.random.shuffle(index_array)

    for i in range(batch_num):
        indices = index_array[i * batch_size: (i + 1) * batch_size] #  fetch out all the induces
        
        examples = []
        for idx in indices:
            examples.append((x[idx], y[idx],  lens[idx]))
       
        examples = sorted(examples, key=lambda e: len(e[0]), reverse=True)
    
        batch_x = [e[0] for e in examples]
        batch_y = [e[1] for e in examples]
#         batch_name = [e[2] for e in examples]
        batch_lens = [e[2] for e in examples]
       

        yield batch_x, batch_y, batch_lens

In [50]:
class TargetMultitaskLoss(nn.Module):
    def __init__(self, task_num=2):
        super(TargetMultitaskLoss, self).__init__()
        self.task_num = task_num
        self.alpha = nn.Parameter(torch.ones((task_num)), requires_grad=True)
        self.mse = nn.MSELoss()
        self.bce = nn.BCELoss()

    def forward(self, opt_student, los, outcome, outcome_y):
        MSE_Loss = self.mse(opt_student, los)
        BCE_Loss = self.bce(outcome, outcome_y)
        return MSE_Loss * self.alpha[0] + BCE_Loss * self.alpha[1]

def get_target_multitask_loss(opt_student, los, outcome, outcome_y):
    mtl = TargetMultitaskLoss(task_num=2)
    return mtl(opt_student, los, outcome, outcome_y)

def reverse_los(y, los_info):
    return y * los_info["los_std"] + los_info["los_mean"]

In [51]:
los_info = pickle.load(open(data_path + 'los_info.pkl', 'rb'))
print(los_info)
logger.info(los_info)

{'los_mean': 1055.0307777880782, 'los_std': 799.0879849276147}


In [52]:
if target_dataset == 'PD':
    n_splits = 5
    epochs = 30
elif target_dataset == 'TJ':
    n_splits = 10
    epochs = 150
elif target_dataset == 'HM':
    n_splits = 3
    epochs = 20

teacher_flag = True
transfer_flag = True
kfold = KFold(n_splits=n_splits, shuffle=True, random_state=RANDOM_SEED)

if target_dataset == 'PD':    
    data_str = 'pd'
elif target_dataset == 'TJ':    
    data_str = 'covid'
elif target_dataset == 'HM':
    data_str = 'spain'

# if teacher_flag:
#     file_name = './model/pretrained-challenge-front-fill-2'+ data_str
# else: 
#     file_name = './model/pretrained-challenge-front-fill-2'+ data_str + '-noteacher'
    
file_name = './model/pretrained-dann'+ data_str;


batch_size = 256

fold_count = 0
total_train_loss = []
total_valid_loss = []

global_best = 10000
mse = []
mad = []
mape = []
kappa = []
history = []

pad_token = np.zeros(input_dim)
# begin_time = time.time()

for train, test in kfold.split(long_x):
        
    train_x = [long_x[i] for i in train]
    train_y = [long_time[i] for i in train]
    train_x_len = [all_x_len[i] for i in train]
    #train_static = [long_static[i] for i in train]
    
    train_x, train_y, train_x_len = get_n2n_data(train_x, train_y, train_x_len)
    if len(train_x) % 256 == 1:
        print(len(train_x))
        print('wrong squeeze!')

# for train, test in kfold.split(long_x):
for train, test in kfold.split(long_x):
    if reverse:
        temp = train
        train = test
        test = temp
    
    model = target_model(input_dim = input_dim,output_dim=output_dim, d_model=d_model, MHD_num_head=MHD_num_head, d_ff=d_ff, hidden_dim=hidden_dim).to(device)
    
    if transfer_flag:
        checkpoint = torch.load(file_name, \
                        map_location=torch.device("cuda:3" if torch.cuda.is_available() == True else 'cpu'))
        pretrain_dict = checkpoint['net']
        model_dict = model.state_dict()
        pretrain_dict = transfer_gru_dict(pretrain_dict, model_dict)
        model_dict.update(pretrain_dict)
        model.load_state_dict(model_dict)
        
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

    fold_count += 1
#     print(train)

    
    train_x = [long_x[i] for i in train]
    train_y = [long_time[i] for i in train]
    train_x_len = [all_x_len[i] for i in train]
    #train_static = [long_static[i] for i in train]
    
    train_x, train_y, train_x_len = get_n2n_data(train_x, train_y, train_x_len)
    
    test_x = [long_x[i] for i in test]
    test_y = [long_time[i] for i in test]
    test_x_len = [all_x_len[i] for i in test]
    #test_static = [long_static[i] for i in test]
    
    test_x, test_y, test_x_len = get_n2n_data(test_x, test_y, test_x_len)
    
    if not os.path.exists('./model/'+data_str):
        os.mkdir('./model/'+data_str)
        
    if transfer_flag:
        target_file_name = './model/'+data_str+'/distcare-trans-'+str(n_splits)+'-fold-LOS-regression' + str(fold_count)#4114
    else:
        target_file_name = './model/'+data_str+'/distcare-no-trans-'+str(n_splits)+'-fold-LOS-regression' + str(fold_count)#4114
    
    fold_train_loss = []
    fold_valid_loss = []
    best_mse = 10000
    best_mad = 0
    best_mape = 0
    best_kappa = 0
    
    for each_epoch in range(epochs):
       
        
        epoch_loss = []
        counter_batch = 0
        model.train()  
        
        for step, (batch_x, batch_y, batch_lens) in enumerate(ckd_batch_iter(train_x, train_y, train_x_len, batch_size, shuffle=True)):  
            optimizer.zero_grad()
            batch_x = torch.tensor(pad_sents(batch_x, pad_token), dtype=torch.float32).to(device)
            batch_y = torch.tensor(batch_y, dtype=torch.float32).to(device)
            batch_lens = torch.tensor(batch_lens, dtype=torch.float32).to(device).int()

            masks = length_to_mask(batch_lens).unsqueeze(-1).float()

            opt, emb = model(batch_x, batch_lens)

            MSE_Loss = get_re_loss(opt, batch_y.unsqueeze(-1))

#             model_loss = pred_loss + 1e7*decov_loss
            model_loss = MSE_Loss

            loss = model_loss

            epoch_loss.append(MSE_Loss.cpu().detach().numpy())
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 20)
            optimizer.step()
            
            if step % 50 == 0:
                print('Fold %d Epoch %d Batch %d: Train Loss = %.4f'%(fold_count,each_epoch, step, loss.cpu().detach().numpy()))
                logger.info('Fold %d Epoch %d Batch %d: Train Loss = %.4f'%(fold_count,each_epoch, step, loss.cpu().detach().numpy()))
            
        epoch_loss = np.mean(epoch_loss)
        fold_train_loss.append(epoch_loss)

        #Validation
        y_true = []
        y_pred = []
        y_pred_flatten = []
        y_true_flatten = []
        outcome_pred_flatten = []
        outcome_true_flatten = []
        with torch.no_grad():
            model.eval()
            valid_loss = []
            valid_true = []
            valid_pred = []
            for batch_x, batch_y, batch_lens in ckd_batch_iter(test_x, test_y, test_x_len, batch_size):
                batch_x = torch.tensor(pad_sents(batch_x, pad_token), dtype=torch.float32).to(device)
                batch_y = torch.tensor(batch_y, dtype=torch.float32).to(device)
                batch_lens = torch.tensor(batch_lens, dtype=torch.float32).to(device).int()
                masks = length_to_mask(batch_lens).unsqueeze(-1).float()
               
                opt, emb = model(batch_x, batch_lens)
                
                MSE_Loss = get_re_loss(opt, batch_y.unsqueeze(-1))
                
                valid_loss.append(MSE_Loss.cpu().detach().numpy())

                y_pred_flatten += [reverse_los(x, los_info) / 30 for x in list(opt.cpu().detach().numpy().flatten())]
                y_true_flatten += [reverse_los(x, los_info) / 30 for x in list(batch_y.cpu().numpy().flatten())]
            

            valid_loss = np.mean(valid_loss)
            fold_valid_loss.append(valid_loss)
            ret = metrics.print_metrics_regression(y_true_flatten, y_pred_flatten, verbose=0)
            history.append(ret)
            #print()

            if each_epoch % 10 == 0:
                print('Fold %d, epoch %d: Loss = %.4f Valid loss = %.4f MSE = %.4f' % (
                    fold_count, each_epoch, fold_train_loss[-1], fold_valid_loss[-1], ret['mse']), flush=True)
                logger.info('Fold %d, epoch %d: Loss = %.4f Valid loss = %.4f MSE = %.4f' % (
                    fold_count, each_epoch, fold_train_loss[-1], fold_valid_loss[-1], ret['mse']))
                # metrics.print_metrics_regression(y_true_flatten, y_pred_flatten)
                
            cur_mse = ret['mse']
            if cur_mse < best_mse:
                print('------------ Save FOLD-BEST model - MSE: %.4f ------------' % cur_mse, flush=True)
                logger.info('------------ Save FOLD-BEST model - MSE: %.4f ------------' % cur_mse)
                metrics.print_metrics_regression(y_true_flatten, y_pred_flatten)
                best_mse = cur_mse
                best_mad = ret['mad']
                state = {
                    'net': model.state_dict(),
                    'optimizer': optimizer.state_dict(),
                    'epoch': each_epoch
                }
                torch.save(state, target_file_name + '_' + str(fold_count))

                if cur_mse < global_best:
                    global_best = cur_mse
                    state = {
                        'net': model.state_dict(),
                        'optimizer': optimizer.state_dict(),
                        'epoch': each_epoch
                    }
                    torch.save(state, target_file_name)
                    print('------------ Save best model - MSE: %.4f ------------' % cur_mse, flush=True)
                    logger.info('------------ Save best model - MSE: %.4f ------------' % cur_mse)

        print('Fold %d, mse = %.4f, mad = %.4f' % (fold_count, ret['mse'], ret['mad']), flush=True)
        logger.info('Fold %d, mse = %.4f, mad = %.4f' % (fold_count, ret['mse'], ret['mad']))

    mse.append(best_mse)
    mad.append(best_mad)
    total_train_loss.append(fold_train_loss)
    total_valid_loss.append(fold_valid_loss)


print('mse %.4f(%.4f)' % (np.mean(mse), np.std(mse)))
print('mad %.4f(%.4f)' % (np.mean(mad), np.std(mad)))
logger.info('mse %.4f(%.4f)' % (np.mean(mse), np.std(mse)))
logger.info('mad %.4f(%.4f)' % (np.mean(mad), np.std(mad)))



Fold 1 Epoch 0 Batch 0: Train Loss = 72.1071
Fold 1, epoch 0: Loss = 7.8526 Valid loss = 1.1793 MSE = 814.2098
------------ Save FOLD-BEST model - MSE: 814.2098 ------------
Custom bins confusion matrix:
[[  0  60 206   0]
 [  0 202 757   0]
 [  0 131 377   0]
 [  0  99 243   0]]
Mean absolute deviation (MAD) = 23.62084598421489
Mean squared error (MSE) = 814.2098191072294
Mean absolute percentage error (MAPE) = 302.500848144334
Cohen kappa score = -0.03263354955477715
------------ Save best model - MSE: 814.2098 ------------
Fold 1, mse = 814.2098, mad = 23.6208




Fold 1 Epoch 1 Batch 0: Train Loss = 1.3365
------------ Save FOLD-BEST model - MSE: 750.5942 ------------
Custom bins confusion matrix:
[[  0   0 266   0]
 [  0   0 959   0]
 [  0   0 508   0]
 [  0   0 342   0]]
Mean absolute deviation (MAD) = 23.632402604685897
Mean squared error (MSE) = 750.594190851123
Mean absolute percentage error (MAPE) = 333.42080882346295
Cohen kappa score = 0.0
------------ Save best model - MSE: 750.5942 ------------
Fold 1, mse = 750.5942, mad = 23.6324




Fold 1 Epoch 2 Batch 0: Train Loss = 1.0165
------------ Save FOLD-BEST model - MSE: 749.3143 ------------
Custom bins confusion matrix:
[[  0   0 266   0]
 [  0   0 959   0]
 [  0   0 508   0]
 [  0   0 342   0]]
Mean absolute deviation (MAD) = 23.677345646159257
Mean squared error (MSE) = 749.3143360443516
Mean absolute percentage error (MAPE) = 337.80900797999436
Cohen kappa score = 0.0
------------ Save best model - MSE: 749.3143 ------------
Fold 1, mse = 749.3143, mad = 23.6773




Fold 1 Epoch 3 Batch 0: Train Loss = 0.8943
------------ Save FOLD-BEST model - MSE: 677.1827 ------------
Custom bins confusion matrix:
[[  0  65 201   0]
 [  0 208 751   0]
 [  0  27 481   0]
 [  0  20 322   0]]
Mean absolute deviation (MAD) = 21.932646845077787
Mean squared error (MSE) = 677.1827282504215
Mean absolute percentage error (MAPE) = 292.40350059492874
Cohen kappa score = 0.09473391294904943
------------ Save best model - MSE: 677.1827 ------------
Fold 1, mse = 677.1827, mad = 21.9326




Fold 1 Epoch 4 Batch 0: Train Loss = 0.9905
------------ Save FOLD-BEST model - MSE: 633.4753 ------------
Custom bins confusion matrix:
[[  0 163 103   0]
 [  0 496 463   0]
 [  0 212 296   0]
 [  0  64 278   0]]
Mean absolute deviation (MAD) = 20.594966195026462
Mean squared error (MSE) = 633.475322366702
Mean absolute percentage error (MAPE) = 248.41779477428375
Cohen kappa score = 0.12861838770870437
------------ Save best model - MSE: 633.4753 ------------
Fold 1, mse = 633.4753, mad = 20.5950




Fold 1 Epoch 5 Batch 0: Train Loss = 1.1202
Fold 1, mse = 659.8279, mad = 20.2928




Fold 1 Epoch 6 Batch 0: Train Loss = 0.8713
Fold 1, mse = 650.0807, mad = 20.2310




Fold 1 Epoch 7 Batch 0: Train Loss = 1.0459
Fold 1, mse = 635.1553, mad = 20.5666




Fold 1 Epoch 8 Batch 0: Train Loss = 1.0840
Fold 1, mse = 662.8716, mad = 20.3004




Fold 1 Epoch 9 Batch 0: Train Loss = 1.0117
Fold 1, mse = 680.9585, mad = 20.5112




Fold 1 Epoch 10 Batch 0: Train Loss = 0.9691
Fold 1, epoch 10: Loss = 0.9452 Valid loss = 0.9515 MSE = 710.1398
Fold 1, mse = 710.1398, mad = 20.8788




Fold 1 Epoch 11 Batch 0: Train Loss = 0.9234
Fold 1, mse = 765.2659, mad = 21.4794




Fold 1 Epoch 12 Batch 0: Train Loss = 1.0027
Fold 1, mse = 721.8142, mad = 21.0009




Fold 1 Epoch 13 Batch 0: Train Loss = 0.9516
------------ Save FOLD-BEST model - MSE: 625.8400 ------------
Custom bins confusion matrix:
[[  0 190  76   0]
 [  0 619 340   0]
 [  0 268 240   0]
 [  0 109 233   0]]
Mean absolute deviation (MAD) = 20.19409781066168
Mean squared error (MSE) = 625.8399505312944
Mean absolute percentage error (MAPE) = 231.3948948087006
Cohen kappa score = 0.1344706696737601
------------ Save best model - MSE: 625.8400 ------------
Fold 1, mse = 625.8400, mad = 20.1941




Fold 1 Epoch 14 Batch 0: Train Loss = 0.8635
Fold 1, mse = 712.3837, mad = 20.8776




Fold 1 Epoch 15 Batch 0: Train Loss = 0.8531
Fold 1, mse = 737.8831, mad = 21.1351




Fold 1 Epoch 16 Batch 0: Train Loss = 0.8397
Fold 1, mse = 882.2655, mad = 23.1343




Fold 1 Epoch 17 Batch 0: Train Loss = 0.9895
Fold 1, mse = 696.2643, mad = 20.6858




Fold 1 Epoch 18 Batch 0: Train Loss = 1.0658
Fold 1, mse = 766.2148, mad = 21.4898




Fold 1 Epoch 19 Batch 0: Train Loss = 0.8657
Fold 1, mse = 631.4930, mad = 20.8332




Fold 1 Epoch 20 Batch 0: Train Loss = 0.9311
Fold 1, epoch 20: Loss = 0.9341 Valid loss = 1.0218 MSE = 751.7872
Fold 1, mse = 751.7872, mad = 21.3301




Fold 1 Epoch 21 Batch 0: Train Loss = 0.9216
Fold 1, mse = 854.9420, mad = 22.8062




Fold 1 Epoch 22 Batch 0: Train Loss = 0.8922
------------ Save FOLD-BEST model - MSE: 613.5950 ------------
Custom bins confusion matrix:
[[  0 199  67   0]
 [  0 640 319   0]
 [  0 262 246   0]
 [  0 112 230   0]]
Mean absolute deviation (MAD) = 19.94442335857548
Mean squared error (MSE) = 613.594980516164
Mean absolute percentage error (MAPE) = 228.008702044516
Cohen kappa score = 0.15230245475721194
------------ Save best model - MSE: 613.5950 ------------
Fold 1, mse = 613.5950, mad = 19.9444




Fold 1 Epoch 23 Batch 0: Train Loss = 1.1818
Fold 1, mse = 832.0229, mad = 22.4699




Fold 1 Epoch 24 Batch 0: Train Loss = 0.9975
Fold 1, mse = 933.6638, mad = 23.7635




Fold 1 Epoch 25 Batch 0: Train Loss = 0.9944
Fold 1, mse = 779.7489, mad = 21.6395




Fold 1 Epoch 26 Batch 0: Train Loss = 1.0959
Fold 1, mse = 748.5878, mad = 21.1958




Fold 1 Epoch 27 Batch 0: Train Loss = 0.9423
Fold 1, mse = 807.3810, mad = 21.9651




Fold 1 Epoch 28 Batch 0: Train Loss = 0.9169
Fold 1, mse = 684.4929, mad = 20.5588




Fold 1 Epoch 29 Batch 0: Train Loss = 0.9781
Fold 1, mse = 757.8957, mad = 21.4504




Fold 2 Epoch 0 Batch 0: Train Loss = 17.3569
Fold 2, epoch 0: Loss = 5.9792 Valid loss = 0.7772 MSE = 569.8493
------------ Save FOLD-BEST model - MSE: 569.8493 ------------
Custom bins confusion matrix:
[[  0 155 116   0]
 [  0 548 425   0]
 [  0 241 330   0]
 [  0  45 225   0]]
Mean absolute deviation (MAD) = 19.34140817836046
Mean squared error (MSE) = 569.8492750891465
Mean absolute percentage error (MAPE) = 259.50288556814775
Cohen kappa score = 0.14169594102958694
------------ Save best model - MSE: 569.8493 ------------
Fold 2, mse = 569.8493, mad = 19.3414




Fold 2 Epoch 1 Batch 0: Train Loss = 1.0209
------------ Save FOLD-BEST model - MSE: 545.6855 ------------
Custom bins confusion matrix:
[[  0 178  93   0]
 [  0 616 357   0]
 [  0 296 275   0]
 [  0  76 194   0]]
Mean absolute deviation (MAD) = 18.47127305523778
Mean squared error (MSE) = 545.6855415317389
Mean absolute percentage error (MAPE) = 232.9154776790005
Cohen kappa score = 0.12607435758421337
------------ Save best model - MSE: 545.6855 ------------
Fold 2, mse = 545.6855, mad = 18.4713




Fold 2 Epoch 2 Batch 0: Train Loss = 0.9966
Fold 2, mse = 549.1325, mad = 18.3344




Fold 2 Epoch 3 Batch 0: Train Loss = 0.9447
------------ Save FOLD-BEST model - MSE: 542.6418 ------------
Custom bins confusion matrix:
[[ 13 188  70   0]
 [  9 663 301   0]
 [  0 343 228   0]
 [  0 105 165   0]]
Mean absolute deviation (MAD) = 18.242439507629108
Mean squared error (MSE) = 542.6418033946734
Mean absolute percentage error (MAPE) = 213.25146782073463
Cohen kappa score = 0.12287004530749157
------------ Save best model - MSE: 542.6418 ------------
Fold 2, mse = 542.6418, mad = 18.2424




Fold 2 Epoch 4 Batch 0: Train Loss = 0.9414
------------ Save FOLD-BEST model - MSE: 541.9739 ------------
Custom bins confusion matrix:
[[  6 164 101   0]
 [  3 593 377   0]
 [  0 274 297   0]
 [  0  61 209   0]]
Mean absolute deviation (MAD) = 18.435064592206054
Mean squared error (MSE) = 541.9739353425562
Mean absolute percentage error (MAPE) = 234.95716899998666
Cohen kappa score = 0.14431565392603496
------------ Save best model - MSE: 541.9739 ------------
Fold 2, mse = 541.9739, mad = 18.4351




Fold 2 Epoch 5 Batch 0: Train Loss = 1.0121
Fold 2, mse = 551.1479, mad = 18.4690




Fold 2 Epoch 6 Batch 0: Train Loss = 0.8236
Fold 2, mse = 543.4236, mad = 18.3083




Fold 2 Epoch 7 Batch 0: Train Loss = 1.0509
------------ Save FOLD-BEST model - MSE: 538.3216 ------------
Custom bins confusion matrix:
[[ 17 173  81   0]
 [ 13 625 335   0]
 [  0 316 255   0]
 [  0  89 181   0]]
Mean absolute deviation (MAD) = 18.226069193796004
Mean squared error (MSE) = 538.3215638147557
Mean absolute percentage error (MAPE) = 218.74482677455367
Cohen kappa score = 0.1344146768258906
------------ Save best model - MSE: 538.3216 ------------
Fold 2, mse = 538.3216, mad = 18.2261




Fold 2 Epoch 8 Batch 0: Train Loss = 0.9894
Fold 2, mse = 557.2650, mad = 18.5608




Fold 2 Epoch 9 Batch 0: Train Loss = 0.9863
Fold 2, mse = 660.2055, mad = 20.4778




Fold 2 Epoch 10 Batch 0: Train Loss = 0.9578
Fold 2, epoch 10: Loss = 0.9835 Valid loss = 0.7344 MSE = 553.7648
Fold 2, mse = 553.7648, mad = 18.5832




Fold 2 Epoch 11 Batch 0: Train Loss = 0.9607
Fold 2, mse = 608.6864, mad = 19.6581




Fold 2 Epoch 12 Batch 0: Train Loss = 0.9533
Fold 2, mse = 709.3873, mad = 21.2633




Fold 2 Epoch 13 Batch 0: Train Loss = 1.0500
Fold 2, mse = 582.1337, mad = 19.0246




Fold 2 Epoch 14 Batch 0: Train Loss = 0.9214
Fold 2, mse = 615.6527, mad = 19.7716




Fold 2 Epoch 15 Batch 0: Train Loss = 1.0217
Fold 2, mse = 607.8264, mad = 19.5673




Fold 2 Epoch 16 Batch 0: Train Loss = 0.8610
Fold 2, mse = 560.6755, mad = 18.7379




Fold 2 Epoch 17 Batch 0: Train Loss = 0.9867
Fold 2, mse = 615.5615, mad = 19.8348




Fold 2 Epoch 18 Batch 0: Train Loss = 0.9148
Fold 2, mse = 770.6163, mad = 22.1817




Fold 2 Epoch 19 Batch 0: Train Loss = 1.0319
Fold 2, mse = 781.2954, mad = 22.3610




Fold 2 Epoch 20 Batch 0: Train Loss = 0.9752
Fold 2, epoch 20: Loss = 0.9582 Valid loss = 1.7205 MSE = 775.3091
Fold 2, mse = 775.3091, mad = 22.2714




Fold 2 Epoch 21 Batch 0: Train Loss = 1.1316
Fold 2, mse = 605.1253, mad = 19.7284




Fold 2 Epoch 22 Batch 0: Train Loss = 0.8920
Fold 2, mse = 594.7267, mad = 19.5065




Fold 2 Epoch 23 Batch 0: Train Loss = 0.9956
Fold 2, mse = 797.3380, mad = 22.5208




Fold 2 Epoch 24 Batch 0: Train Loss = 1.0019
Fold 2, mse = 820.7901, mad = 22.6070




Fold 2 Epoch 25 Batch 0: Train Loss = 1.0222
Fold 2, mse = 745.8438, mad = 21.6058




Fold 2 Epoch 26 Batch 0: Train Loss = 0.9849
Fold 2, mse = 707.3613, mad = 21.2887




Fold 2 Epoch 27 Batch 0: Train Loss = 0.9128
Fold 2, mse = 806.3321, mad = 22.5521




Fold 2 Epoch 28 Batch 0: Train Loss = 0.9415
Fold 2, mse = 999.0415, mad = 24.7160




Fold 2 Epoch 29 Batch 0: Train Loss = 1.0053
Fold 2, mse = 703.0652, mad = 21.2001




Fold 3 Epoch 0 Batch 0: Train Loss = 19.6781
Fold 3, epoch 0: Loss = 3.8732 Valid loss = 0.9116 MSE = 651.3079
------------ Save FOLD-BEST model - MSE: 651.3079 ------------
Custom bins confusion matrix:
[[  0 148 133   0]
 [  0 469 650   0]
 [  0 236 457   0]
 [  0  50 347   0]]
Mean absolute deviation (MAD) = 20.84477064604104
Mean squared error (MSE) = 651.3079292808779
Mean absolute percentage error (MAPE) = 207.1259888751579
Cohen kappa score = 0.11120305498856842
Fold 3, mse = 651.3079, mad = 20.8448




Fold 3 Epoch 1 Batch 0: Train Loss = 1.1399
------------ Save FOLD-BEST model - MSE: 624.2762 ------------
Custom bins confusion matrix:
[[  0 261  20   0]
 [  0 886 233   0]
 [  0 450 243   0]
 [  0 215 182   0]]
Mean absolute deviation (MAD) = 19.92716747754808
Mean squared error (MSE) = 624.2762468365441
Mean absolute percentage error (MAPE) = 176.03332305863378
Cohen kappa score = 0.13841951219512194
Fold 3, mse = 624.2762, mad = 19.9272




Fold 3 Epoch 2 Batch 0: Train Loss = 0.9251
------------ Save FOLD-BEST model - MSE: 618.9336 ------------
Custom bins confusion matrix:
[[  0 255  26   0]
 [  0 839 280   0]
 [  0 411 282   0]
 [  0 193 204   0]]
Mean absolute deviation (MAD) = 19.92940166293779
Mean squared error (MSE) = 618.9336492387221
Mean absolute percentage error (MAPE) = 179.44295635695377
Cohen kappa score = 0.1492564287927295
Fold 3, mse = 618.9336, mad = 19.9294




Fold 3 Epoch 3 Batch 0: Train Loss = 0.9643
Fold 3, mse = 631.4752, mad = 19.9394




Fold 3 Epoch 4 Batch 0: Train Loss = 1.0566
Fold 3, mse = 690.8201, mad = 20.7467




Fold 3 Epoch 5 Batch 0: Train Loss = 0.9990
Fold 3, mse = 656.2347, mad = 20.3240




Fold 3 Epoch 6 Batch 0: Train Loss = 0.9353
Fold 3, mse = 640.4016, mad = 20.2078




Fold 3 Epoch 7 Batch 0: Train Loss = 1.0733
Fold 3, mse = 627.8571, mad = 20.1888




Fold 3 Epoch 8 Batch 0: Train Loss = 0.9703
Fold 3, mse = 629.4040, mad = 20.1573




Fold 3 Epoch 9 Batch 0: Train Loss = 1.1132
Fold 3, mse = 730.8017, mad = 21.4309




Fold 3 Epoch 10 Batch 0: Train Loss = 1.0242
Fold 3, epoch 10: Loss = 0.9710 Valid loss = 0.8968 MSE = 638.1349
Fold 3, mse = 638.1349, mad = 20.2042




Fold 3 Epoch 11 Batch 0: Train Loss = 1.0593
Fold 3, mse = 675.7176, mad = 20.7740




Fold 3 Epoch 12 Batch 0: Train Loss = 1.0377
Fold 3, mse = 645.7640, mad = 20.3258




Fold 3 Epoch 13 Batch 0: Train Loss = 1.1186
Fold 3, mse = 679.9009, mad = 20.6887




Fold 3 Epoch 14 Batch 0: Train Loss = 1.0065
Fold 3, mse = 667.9975, mad = 20.5931




Fold 3 Epoch 15 Batch 0: Train Loss = 0.9175
Fold 3, mse = 673.4206, mad = 20.5926




Fold 3 Epoch 16 Batch 0: Train Loss = 0.9251
Fold 3, mse = 641.3312, mad = 20.2139




Fold 3 Epoch 17 Batch 0: Train Loss = 0.9262
Fold 3, mse = 675.5153, mad = 20.7260




Fold 3 Epoch 18 Batch 0: Train Loss = 1.1502
Fold 3, mse = 807.6460, mad = 22.2971




Fold 3 Epoch 19 Batch 0: Train Loss = 0.9321
Fold 3, mse = 717.1849, mad = 21.0736




Fold 3 Epoch 20 Batch 0: Train Loss = 0.9346
Fold 3, epoch 20: Loss = 0.9548 Valid loss = 1.3530 MSE = 820.0933
Fold 3, mse = 820.0933, mad = 22.2767




Fold 3 Epoch 21 Batch 0: Train Loss = 0.8687
Fold 3, mse = 739.5975, mad = 21.2385




Fold 3 Epoch 22 Batch 0: Train Loss = 0.8696
Fold 3, mse = 843.2729, mad = 22.5037




Fold 3 Epoch 23 Batch 0: Train Loss = 1.0094
Fold 3, mse = 858.5342, mad = 22.7345




Fold 3 Epoch 24 Batch 0: Train Loss = 0.8898
Fold 3, mse = 770.6214, mad = 21.7128




Fold 3 Epoch 25 Batch 0: Train Loss = 1.1575
Fold 3, mse = 785.6282, mad = 21.9452




Fold 3 Epoch 26 Batch 0: Train Loss = 0.9572
Fold 3, mse = 817.1407, mad = 22.4509




Fold 3 Epoch 27 Batch 0: Train Loss = 0.9755
Fold 3, mse = 825.2895, mad = 22.3501




Fold 3 Epoch 28 Batch 0: Train Loss = 1.0590
Fold 3, mse = 779.2282, mad = 21.9237




Fold 3 Epoch 29 Batch 0: Train Loss = 0.9004
Fold 3, mse = 947.0709, mad = 23.8943




Fold 4 Epoch 0 Batch 0: Train Loss = 18.1816
Fold 4, epoch 0: Loss = 2.4800 Valid loss = 1.1380 MSE = 816.2749
------------ Save FOLD-BEST model - MSE: 816.2749 ------------
Custom bins confusion matrix:
[[  0 132 118   0]
 [  0 476 401   0]
 [  0 330 225   0]
 [  0 141 279   0]]
Mean absolute deviation (MAD) = 23.113430187048532
Mean squared error (MSE) = 816.274939521705
Mean absolute percentage error (MAPE) = 227.18844621011664
Cohen kappa score = 0.03430838300649319
Fold 4, mse = 816.2749, mad = 23.1134




Fold 4 Epoch 1 Batch 0: Train Loss = 0.8474
Fold 4, mse = 841.3248, mad = 23.3272




Fold 4 Epoch 2 Batch 0: Train Loss = 0.9318
Fold 4, mse = 851.7866, mad = 23.2881




Fold 4 Epoch 3 Batch 0: Train Loss = 0.8632
Fold 4, mse = 831.7455, mad = 23.2595




Fold 4 Epoch 4 Batch 0: Train Loss = 0.8539
Fold 4, mse = 846.6688, mad = 23.2745




Fold 4 Epoch 5 Batch 0: Train Loss = 0.9331
Fold 4, mse = 849.8091, mad = 23.3133




Fold 4 Epoch 6 Batch 0: Train Loss = 0.8918
Fold 4, mse = 864.5068, mad = 23.2790




Fold 4 Epoch 7 Batch 0: Train Loss = 0.7644
Fold 4, mse = 837.6782, mad = 23.3000




Fold 4 Epoch 8 Batch 0: Train Loss = 0.9790
Fold 4, mse = 876.5536, mad = 23.2834




Fold 4 Epoch 9 Batch 0: Train Loss = 0.8811
Fold 4, mse = 870.3937, mad = 23.2902




Fold 4 Epoch 10 Batch 0: Train Loss = 0.9348
Fold 4, epoch 10: Loss = 0.9125 Valid loss = 1.1937 MSE = 875.0414
Fold 4, mse = 875.0414, mad = 23.2987




Fold 4 Epoch 11 Batch 0: Train Loss = 1.0012
Fold 4, mse = 892.8378, mad = 23.3228




Fold 4 Epoch 12 Batch 0: Train Loss = 0.9545
Fold 4, mse = 901.5189, mad = 23.3594




Fold 4 Epoch 13 Batch 0: Train Loss = 0.9683
Fold 4, mse = 879.5098, mad = 23.3446




Fold 4 Epoch 14 Batch 0: Train Loss = 0.8630
Fold 4, mse = 869.7195, mad = 23.3537




Fold 4 Epoch 15 Batch 0: Train Loss = 1.0236
Fold 4, mse = 916.1129, mad = 23.4351




Fold 4 Epoch 16 Batch 0: Train Loss = 1.0029
Fold 4, mse = 899.6968, mad = 23.4019




Fold 4 Epoch 17 Batch 0: Train Loss = 0.8462
Fold 4, mse = 878.7817, mad = 23.4739




Fold 4 Epoch 18 Batch 0: Train Loss = 0.9480
Fold 4, mse = 897.3107, mad = 23.4621




Fold 4 Epoch 19 Batch 0: Train Loss = 0.7857
Fold 4, mse = 907.6589, mad = 23.4667




Fold 4 Epoch 20 Batch 0: Train Loss = 0.8125
Fold 4, epoch 20: Loss = 0.8852 Valid loss = 1.2483 MSE = 925.4428
Fold 4, mse = 925.4428, mad = 23.5659




Fold 4 Epoch 21 Batch 0: Train Loss = 0.9953
Fold 4, mse = 955.4614, mad = 23.6197




Fold 4 Epoch 22 Batch 0: Train Loss = 0.8781
Fold 4, mse = 953.4800, mad = 23.7293




Fold 4 Epoch 23 Batch 0: Train Loss = 1.0178
Fold 4, mse = 985.6396, mad = 23.7532




Fold 4 Epoch 24 Batch 0: Train Loss = 0.8773
Fold 4, mse = 903.9036, mad = 23.6375




Fold 4 Epoch 25 Batch 0: Train Loss = 0.8402
Fold 4, mse = 934.6306, mad = 23.6397




Fold 4 Epoch 26 Batch 0: Train Loss = 0.8374
Fold 4, mse = 946.0298, mad = 23.6979




Fold 4 Epoch 27 Batch 0: Train Loss = 0.8029
Fold 4, mse = 955.3393, mad = 23.7572




Fold 4 Epoch 28 Batch 0: Train Loss = 0.8884
Fold 4, mse = 982.6415, mad = 23.7852




Fold 4 Epoch 29 Batch 0: Train Loss = 0.7636
Fold 4, mse = 975.6768, mad = 23.7367




Fold 5 Epoch 0 Batch 0: Train Loss = 9.4128
Fold 5, epoch 0: Loss = 1.8103 Valid loss = 1.0857 MSE = 770.1818
------------ Save FOLD-BEST model - MSE: 770.1818 ------------
Custom bins confusion matrix:
[[  0   0 293   0]
 [  0   0 910   0]
 [  0   0 521   0]
 [  0   0 311   0]]
Mean absolute deviation (MAD) = 23.60025101142629
Mean squared error (MSE) = 770.1818231650154
Mean absolute percentage error (MAPE) = 329.28523532929063
Cohen kappa score = 0.0
Fold 5, mse = 770.1818, mad = 23.6003




Fold 5 Epoch 1 Batch 0: Train Loss = 0.9771
------------ Save FOLD-BEST model - MSE: 759.9734 ------------
Custom bins confusion matrix:
[[  0   0 293   0]
 [  0   0 910   0]
 [  0   0 521   0]
 [  0   0 311   0]]
Mean absolute deviation (MAD) = 23.3655392802782
Mean squared error (MSE) = 759.9733699417149
Mean absolute percentage error (MAPE) = 322.4155760410827
Cohen kappa score = 0.0
Fold 5, mse = 759.9734, mad = 23.3655




Fold 5 Epoch 2 Batch 0: Train Loss = 0.9551
------------ Save FOLD-BEST model - MSE: 751.1380 ------------
Custom bins confusion matrix:
[[  0   3 290   0]
 [  0   0 910   0]
 [  0   0 521   0]
 [  0   0 311   0]]
Mean absolute deviation (MAD) = 23.157001799869526
Mean squared error (MSE) = 751.138007001242
Mean absolute percentage error (MAPE) = 315.6428681843369
Cohen kappa score = 0.0013579490616767442
Fold 5, mse = 751.1380, mad = 23.1570




Fold 5 Epoch 3 Batch 0: Train Loss = 0.9572
------------ Save FOLD-BEST model - MSE: 668.0501 ------------
Custom bins confusion matrix:
[[ 11 103 179   0]
 [ 18 243 649   0]
 [  0 106 415   0]
 [  0   5 306   0]]
Mean absolute deviation (MAD) = 21.21315295122685
Mean squared error (MSE) = 668.0501440961275
Mean absolute percentage error (MAPE) = 263.94654448625926
Cohen kappa score = 0.1087070777855641
Fold 5, mse = 668.0501, mad = 21.2132




Fold 5 Epoch 4 Batch 0: Train Loss = 0.9695
Fold 5, mse = 705.6052, mad = 20.8135




Fold 5 Epoch 5 Batch 0: Train Loss = 0.9488
Fold 5, mse = 684.6701, mad = 20.4734




Fold 5 Epoch 6 Batch 0: Train Loss = 0.8956
Fold 5, mse = 766.3085, mad = 21.0966




Fold 5 Epoch 7 Batch 0: Train Loss = 1.0961
Fold 5, mse = 885.9864, mad = 22.8099




Fold 5 Epoch 8 Batch 0: Train Loss = 0.9247
Fold 5, mse = 853.0717, mad = 22.3373




Fold 5 Epoch 9 Batch 0: Train Loss = 0.8312
------------ Save FOLD-BEST model - MSE: 655.1040 ------------
Custom bins confusion matrix:
[[  0 191 102   0]
 [  1 550 359   0]
 [  0 261 260   0]
 [  0 103 208   0]]
Mean absolute deviation (MAD) = 20.625540224980906
Mean squared error (MSE) = 655.1039651227211
Mean absolute percentage error (MAPE) = 236.97014876538236
Cohen kappa score = 0.10963208549343295
Fold 5, mse = 655.1040, mad = 20.6255




Fold 5 Epoch 10 Batch 0: Train Loss = 0.9083
Fold 5, epoch 10: Loss = 0.9483 Valid loss = 1.1313 MSE = 774.6718
Fold 5, mse = 774.6718, mad = 21.1729




Fold 5 Epoch 11 Batch 0: Train Loss = 1.0426
Fold 5, mse = 806.7620, mad = 21.6726




Fold 5 Epoch 12 Batch 0: Train Loss = 0.9108
Fold 5, mse = 725.1805, mad = 20.6897




Fold 5 Epoch 13 Batch 0: Train Loss = 1.0442
Fold 5, mse = 837.5602, mad = 21.9958




Fold 5 Epoch 14 Batch 0: Train Loss = 0.9121
Fold 5, mse = 780.8209, mad = 21.2200




Fold 5 Epoch 15 Batch 0: Train Loss = 0.9053
Fold 5, mse = 889.8362, mad = 22.8526




Fold 5 Epoch 16 Batch 0: Train Loss = 0.9262
Fold 5, mse = 1027.7036, mad = 24.7703




Fold 5 Epoch 17 Batch 0: Train Loss = 1.0287
Fold 5, mse = 822.3946, mad = 21.6961




Fold 5 Epoch 18 Batch 0: Train Loss = 1.0090
Fold 5, mse = 882.0251, mad = 22.4194




Fold 5 Epoch 19 Batch 0: Train Loss = 1.0251
Fold 5, mse = 1034.6094, mad = 24.7585




Fold 5 Epoch 20 Batch 0: Train Loss = 0.8348
Fold 5, epoch 20: Loss = 0.9143 Valid loss = 1.0042 MSE = 703.9792
Fold 5, mse = 703.9792, mad = 20.1546




Fold 5 Epoch 21 Batch 0: Train Loss = 1.0813
Fold 5, mse = 874.7063, mad = 22.5167




Fold 5 Epoch 22 Batch 0: Train Loss = 0.9057
Fold 5, mse = 752.8681, mad = 20.9972




Fold 5 Epoch 23 Batch 0: Train Loss = 0.8785
Fold 5, mse = 902.3720, mad = 23.0314




Fold 5 Epoch 24 Batch 0: Train Loss = 0.7134
Fold 5, mse = 724.9596, mad = 20.5189




Fold 5 Epoch 25 Batch 0: Train Loss = 0.9268
Fold 5, mse = 878.9907, mad = 22.6766




Fold 5 Epoch 26 Batch 0: Train Loss = 1.0435
Fold 5, mse = 831.5833, mad = 21.9807




Fold 5 Epoch 27 Batch 0: Train Loss = 0.9216
Fold 5, mse = 1065.6843, mad = 25.2957




Fold 5 Epoch 28 Batch 0: Train Loss = 0.8886
Fold 5, mse = 904.7855, mad = 22.9090




Fold 5 Epoch 29 Batch 0: Train Loss = 0.7771
Fold 5, mse = 695.2265, mad = 20.3360
mse 648.4458(92.1128)
mad 20.3678(1.5851)
