# Task 3: LOS - Unimodality (Pretrained Encoder)

## Setting Environment

In [None]:
%load_ext autoreload
%autoreload 2

from IPython.core.magic import register_cell_magic

@register_cell_magic
def skip(line, cell):
    return

In [None]:
import sys
import os

root_path = "..."

from config import device, data_folder, log_folder
import pickle
task_dir = "LOS"
data_folder+=task_dir+"/"
log_folder+=task_dir+"/"

from itertools import combinations,product
from models.unimodal import create_unimodal_model
from models.multimodal import create_multimodal_model
from training_evaluation import run_kfolds
import torch

In [None]:
from datasets.LOS.static import StaticLoader
static_dataset_path = data_folder+"/static.pkl"
if os.path.exists(static_dataset_path):
    with open(static_dataset_path, "rb") as f:
        static = pickle.load(f)
else:
    static = StaticLoader()
    with open(static_dataset_path, "wb") as f:
        pickle.dump(static, f)
ids = static.get_ids()
targets_df = static.get_targets()
targets = targets_df.targets.values
targets_num = 1

In [None]:
import src.utils.data_selection as stool
kfolds_fpath = root_path+"datasets/%s/kfolds.pkl"%(task_dir)
if os.path.exists(kfolds_fpath):
    with open(kfolds_fpath, "rb") as f:
        kfolds = pickle.load(f)

## Static

In [None]:
static_dataset = static.get_dataset() 
static_dataset_param = {
    "path": static_dataset_path,
    "num": len(static_dataset)
}

In [None]:
from blocks.mlp import  MLP, MLPDecoderReg

FEATS_NUM = len(static.feats_cols)

EMBED_DIM = 512
ENCODER_DROPOUT = 0.1
DECODER_DROPOUT = 0.5


#MLP
MLP_param = {
    "in_dim": FEATS_NUM,
    "hidden_dim": [EMBED_DIM, EMBED_DIM],
    "drop_prob": ENCODER_DROPOUT
}

#Setting Decoder
MLP_Decoder_param = {
    "in_dim": EMBED_DIM,
    "hidden_dim": [EMBED_DIM//2],
    "out_dim": 1,
    "drop_prob": DECODER_DROPOUT
}

#Setting Training Parameters
train_param = {
    "DATASET": static_dataset_param,
    "MODEL_NAME": "static_unimodal",
    "ENCODER_PARAM": [MLP_param],
    "ENCODER_MODEL": [MLP.__name__],
    "DECODER_PARAM": MLP_Decoder_param,
    "DECODER_MODEL": MLPDecoderReg.__name__,
    "BATCH_SIZE": 128,
    "LR": 0.01,
    "MAX_EPOCHS": 20,
    "OPTIMIZER": "Adam"
}

model = create_unimodal_model(train_param["ENCODER_MODEL"], 
                            train_param["ENCODER_PARAM"], 
                            train_param["DECODER_MODEL"], 
                            train_param["DECODER_PARAM"], device)

log = run_kfolds(train_param, model, static_dataset, kfolds, log_folder=log_folder, classification=False)

## Labs

In [None]:
from datasets.LOS.labs import LabsLoader
labs_dataset_path = data_folder+"/labs.pkl"
if os.path.exists(labs_dataset_path):
    with open(labs_dataset_path, "rb") as f:
        labs = pickle.load(f)
else:
    labs = LabsLoader(ids, targets)
    with open(labs_dataset_path, "wb") as f:
        pickle.dump(labs, f, protocol=pickle.HIGHEST_PROTOCOL)

labs_dataset = labs.get_dataset(only_valid=True)
labs_ids = labs.get_ids(only_valid=True)

labs_dataset_param = {
    "path": labs_dataset_path,
    "only_valid": True,
    "num": len(labs_dataset)
}

#getting the labs kfolds(subsets of the kfolds, only with the valid labs)
labs_kfolds = stool.get_sub_kfolds(ids, labs_ids, kfolds)

In [None]:
from blocks.embedding import TimeWinEmbedding
from blocks.rnn import LSTM
from blocks.mlp import  MLP, MLPDecoderReg
from datasets.collate_fun import CreateCustomDataset, time_win_tokens_batch

#get values/souces vocab numbers
labs_val_vsize = int(labs.values_None_label+1)
labs_src_vsize = int(labs.sources_None_label+1)
print(labs_val_vsize, labs_src_vsize)

EMBED_DIM = 512
ENCODER_DROPOUT = 0.05
DECODER_DROPOUT = 0.1

TWEmbed_param = {
    "value_vocab_size":labs_val_vsize, 
    "source_vocab_size":labs_src_vsize, 
    "win_size":labs.win_num, 
    "embed_dim":EMBED_DIM, 
    "device":device, 
    "temporal_weighted":False,
    "shared_embedding":True
}

#BiLSTM
LSTM_param = {
    "input_size": EMBED_DIM,
    "hidden_size": EMBED_DIM//2,
    "num_layers": 2,
    "bidirectional":True
}

#MLP
MLP_param = {
    "in_dim": EMBED_DIM,
    "hidden_dim": [EMBED_DIM, EMBED_DIM],
    "drop_prob": ENCODER_DROPOUT
}

#Setting Decoder
#MLPDecoder
MLP_decoder_param = {
    "in_dim": EMBED_DIM,
    "out_dim": targets_num,
    "hidden_dim": [EMBED_DIM//2],
    "drop_prob": DECODER_DROPOUT
}

collate_fn_params = [
    {"name":time_win_tokens_batch.__name__, "param":{"accum":True,"onset":False}},
    {"name":time_win_tokens_batch.__name__, "param":{"accum":True,"onset":False}},
]

#Setting Training Parameters
train_param = {
    "DATASET": labs_dataset_param,
    "MODEL_NAME": "labs_unimodal",
    "ENCODER_PARAM": [TWEmbed_param, LSTM_param, MLP_param],
    "ENCODER_MODEL": [TimeWinEmbedding.__name__, LSTM.__name__, MLP.__name__],
    "DECODER_PARAM": MLP_decoder_param,
    "DECODER_MODEL": MLPDecoderReg.__name__,
    "BATCH_SIZE": 128,
    "LR": 0.0025,
    "MAX_EPOCHS": 20,
    "OPTIMIZER": "Adam",
    "COLLATE_FN_PARAMS": collate_fn_params
}

model = create_unimodal_model(train_param["ENCODER_MODEL"], 
                            train_param["ENCODER_PARAM"], 
                            train_param["DECODER_MODEL"], 
                            train_param["DECODER_PARAM"], device)

collate_batch = CreateCustomDataset(len(collate_fn_params), train_param["COLLATE_FN_PARAMS"], classfication=False)
log = run_kfolds(train_param, model, labs_dataset, labs_kfolds, collate_fun = collate_batch, log_folder=log_folder, classification=False)

## Vitals

### Numerical Vitals

#### Segments

In [None]:
from datasets.LOS.vitalsigns import VitalsNumLoader
vitals_segs_dataset_path = data_folder+"/vitals_segs.pkl"
vitals_segs_win_size = 12
vitals_segs_num = 24
if os.path.exists(vitals_segs_dataset_path):
    with open(vitals_segs_dataset_path, "rb") as f:
        vitals_segs = pickle.load(f)
else:
    vitals_segs = VitalsNumLoader(ids, targets)
    with open(vitals_segs_dataset_path, "wb") as f:
        pickle.dump(vitals_segs, f, protocol=pickle.HIGHEST_PROTOCOL)

vitals_seg_dataset_param = {
    "path": vitals_segs_dataset_path,
    "win_size": vitals_segs_win_size,
    "segs_num": vitals_segs_num,
    "only_valid": True,
    "flatten":True
}

#set segments parameters
vitals_segs.set_seg_data(vitals_segs_win_size, vitals_segs_num)
vitals_segs_dataset = vitals_segs.get_dataset(only_valid=vitals_seg_dataset_param["only_valid"], 
                                              flatten=vitals_seg_dataset_param["flatten"])

vitals_num_ids = vitals_segs.get_ids(only_valid=vitals_seg_dataset_param["only_valid"])
vitals_seg_dataset_param["num"] = len(vitals_segs_dataset)

#get kfolds index of the flatten segments
vitals_num_kfolds = stool.get_sub_kfolds(ids, vitals_num_ids, kfolds)
vitals_segs_kfolds = []
for _train_idx, _valid_idx, _test_idx in vitals_num_kfolds:
    vitals_segs_kfolds.append([vitals_segs.get_flatten_idx(_train_idx), 
                               vitals_segs.get_flatten_idx(_valid_idx), 
                               vitals_segs.get_flatten_idx(_test_idx)])

In [None]:
from blocks.mlp import MLP, MLPDecoderReg
from blocks.resnet import ResNet1d
from blocks.attention import FeatureSelfAttn

#for win12
if vitals_segs_win_size == 12:
    ATTEN_OUT_DIM = 8
    EMBED_DIM = 128
    LR = 0.0025

#for win24
if vitals_segs_win_size == 24:
    ATTEN_OUT_DIM = 16
    EMBED_DIM = 256
    LR = 0.001

FILTER_SIZE = [ATTEN_OUT_DIM*2, ATTEN_OUT_DIM*4]
SEQ_LEN = [vitals_segs_win_size, vitals_segs_win_size//2]

SelfAttn_param = {
    "embed_dim":8,
    "num_heads":4,
    "drop_prob":0.05,
    "out_dim":ATTEN_OUT_DIM
}

ResNet_param = {
    "input_dim": (SelfAttn_param["out_dim"], vitals_segs_win_size),
    "blocks_dim": list(zip(FILTER_SIZE, SEQ_LEN)),
    "kernel_size": 3,
    "dropout_rate": 0.3
}

MLP_param = {
    "in_dim": ResNet_param["blocks_dim"][-1][0] * ResNet_param["blocks_dim"][-1][1],
    "hidden_dim": [EMBED_DIM],
    "drop_prob": 0.1
}

MLP_decoder_param = {
    "in_dim": MLP_param["hidden_dim"][-1],
    "out_dim": targets_num,
    "hidden_dim": [MLP_param["hidden_dim"][-1]//2],
    "drop_prob": 0.25
}

#Setting Training Parameters
train_param = {
    "DATASET": vitals_seg_dataset_param,
    "MODEL_NAME": "vitals_segs_unimodal",
    "ENCODER_PARAM": [SelfAttn_param, ResNet_param, MLP_param],
    "ENCODER_MODEL": [FeatureSelfAttn.__name__, ResNet1d.__name__, MLP.__name__],
    "DECODER_PARAM": MLP_decoder_param,
    "DECODER_MODEL": MLPDecoderReg.__name__,
    "BATCH_SIZE": 128,
    "LR": LR,
    "MAX_EPOCHS": 20,
    "OPTIMIZER": "Adam"
}

model = create_unimodal_model(train_param["ENCODER_MODEL"], 
                            train_param["ENCODER_PARAM"], 
                            train_param["DECODER_MODEL"], 
                            train_param["DECODER_PARAM"], device)

log = run_kfolds(train_param, model, vitals_segs_dataset, vitals_segs_kfolds, log_folder=log_folder, classification=False)

#### Multi-Segments

In [None]:
from datasets.LOS.vitalsigns import VitalsNumLoader, multiscale_vitalsigns
vitals_segs_dataset_path = data_folder+"/vitals_segs.pkl"
vitals_segs_num = 24
if os.path.exists(vitals_segs_dataset_path):
    with open(vitals_segs_dataset_path, "rb") as f:
        vitals_segs = pickle.load(f)
else:
    vitals_segs = VitalsNumLoader(ids, targets)
    with open(vitals_segs_dataset_path, "wb") as f:
        pickle.dump(vitals_segs, f, protocol=pickle.HIGHEST_PROTOCOL)

win_size_list = [12, 24]
multiscale = []
for vitals_segs_win_size in win_size_list:
    vitals_segs.set_seg_data(vitals_segs_win_size, vitals_segs_num)
    vitals_segs_dataset = vitals_segs.get_dataset(only_valid=True, flatten=False)
    multiscale.append(vitals_segs_dataset)

vitals_multiscale_dataset = multiscale_vitalsigns(multiscale[0], multiscale[1])

vitals_multiscale_dataset_param = {
    "path": vitals_segs_dataset_path,
    "win_size": win_size_list,
    "segs_num": vitals_segs_num,
    "only_valid": True,
    "flatten": False,
    "num": len(vitals_multiscale_dataset)
}

vitals_num_kfolds = stool.get_sub_kfolds(ids, vitals_num_ids, kfolds)

In [None]:
from blocks.mlp import MLP, MLPDecoderReg
from blocks.resnet import ResNet1d
from models.unimodal import MultiScale

k_models = True 
EMBED_DIM = 256

MultiScale_param = {
"model_path_list": [log_folder+"/vitals_segs_unimodal/.../",#win12
                    log_folder+"/vitals_segs_unimodal/.../"], #win24
"model_list":[],
"size_list": win_size_list
}

#loading fold0 models
for model_path in MultiScale_param["model_path_list"]:
    seg_model = torch.load(model_path+"model_0.pth", weights_only=False)
    MultiScale_param["model_list"].append(seg_model.encoder)

seg_embeds_size = sum([model[-1].mlp[0].out_features for model in MultiScale_param["model_list"]])
FILTER_SIZE = [seg_embeds_size, seg_embeds_size//2]
SEQ_LEN = [vitals_segs_num, vitals_segs_num//2]

ResNet_param = {
    "input_dim": (seg_embeds_size, vitals_segs_num),
    "blocks_dim": list(zip(FILTER_SIZE, SEQ_LEN)),
    "kernel_size": 3,
    "dropout_rate": 0.3
}

MLP_param = {
    "in_dim": ResNet_param["blocks_dim"][-1][0] * ResNet_param["blocks_dim"][-1][1],
    "hidden_dim": [EMBED_DIM],
    "drop_prob": 0.1
}

MLP_decoder_param = {
    "in_dim": MLP_param["hidden_dim"][-1],
    "out_dim": targets_num,
    "hidden_dim": [MLP_param["hidden_dim"][-1]//2],
    "drop_prob": 0.25
}

#Setting Training Parameters
train_param = {
    "DATASET": vitals_multiscale_dataset_param,
    "MODEL_NAME": "vitals_num_unimodal",
    "ENCODER_PARAM": [MultiScale_param, ResNet_param, MLP_param],
    "ENCODER_MODEL": [MultiScale.__name__, ResNet1d.__name__, MLP.__name__],
    "DECODER_PARAM": MLP_decoder_param,
    "DECODER_MODEL": MLPDecoderReg.__name__,
    "BATCH_SIZE": 128,
    "LR": 0.0005,
    "MAX_EPOCHS": 20,
    "OPTIMIZER": "Adam"
}

#loading k models for each fold
if k_models:
    model = []
    for i in range(len(kfolds)):
        MultiScale_param["model_list"] = []
        for model_path in train_param["ENCODER_PARAM"][0]["model_path_list"]:
            seg_model = torch.load(model_path+"model_%d.pth"%(i), weights_only=False)
            MultiScale_param["model_list"].append(seg_model.encoder)

        model_i = create_unimodal_model(train_param["ENCODER_MODEL"], 
                                    train_param["ENCODER_PARAM"], 
                                    train_param["DECODER_MODEL"], 
                                    train_param["DECODER_PARAM"], device)
        model.append(model_i)
else:
    model = create_unimodal_model(train_param["ENCODER_MODEL"], 
                                train_param["ENCODER_PARAM"], 
                                train_param["DECODER_MODEL"], 
                                train_param["DECODER_PARAM"], device)

del MultiScale_param["model_list"]


log = run_kfolds(train_param, model, vitals_multiscale_dataset, vitals_num_kfolds, log_folder=log_folder, classification=False)

### Categorical Vitals

In [None]:
from datasets.LOS.vitalsigns import VitalsCatLoader
vitals_cat_dataset_path = data_folder+"/vitals_cat.pkl"
if os.path.exists(vitals_cat_dataset_path):
    with open(vitals_cat_dataset_path, "rb") as f:
        vitals_cat = pickle.load(f)
else:
    vitals_cat = VitalsCatLoader(ids, targets)
    with open(vitals_cat_dataset_path, "wb") as f:
        pickle.dump(vitals_cat, f, protocol=pickle.HIGHEST_PROTOCOL)


vitals_cat_dataset = vitals_cat.get_dataset(only_valid=True)
vitals_cat_ids = vitals_cat.get_ids(only_valid=True)

vitals_cat_dataset_param = {
    "path": vitals_cat_dataset_path,
    "only_valid": True,
    "num":len(vitals_cat_dataset)
}

vitals_cat_kfolds = stool.get_sub_kfolds(ids, vitals_cat_ids, kfolds)

In [None]:
from blocks.embedding import TimeWinEmbedding
from blocks.rnn import LSTM
from blocks.mlp import  MLP, MLPDecoderReg
from datasets.collate_fun import CreateCustomDataset, time_win_tokens_batch

vitals_val_vsize = int(vitals_cat.vals_None_label+1)
vitals_src_vsize = int(vitals_cat.srcs_None_label+1)
print(vitals_val_vsize, vitals_src_vsize)
EMBED_DIM = 256

TWEmbed_param = {
    "value_vocab_size":vitals_val_vsize, 
    "source_vocab_size":vitals_src_vsize, 
    "win_size":vitals_cat.win_num, 
    "embed_dim":EMBED_DIM, 
    "device":device, 
    "temporal_weighted":False, 
    "shared_embedding":True
}

#BiLSTM
LSTM_param = {
    "input_size": EMBED_DIM,
    "hidden_size": EMBED_DIM//2,
    "num_layers": 2,
    "bidirectional":True
}

MLP_param = {
    "in_dim": EMBED_DIM,
    "hidden_dim": [EMBED_DIM, EMBED_DIM],
    "drop_prob": 0.1
}

MLP_decoder_param = {
    "in_dim": EMBED_DIM,
    "out_dim": targets_num,
    "hidden_dim": [EMBED_DIM//2],
    "drop_prob": 0.1
}   

collate_fn_params = [
    {"name":time_win_tokens_batch.__name__, "param":{"accum":True,"onset":False}},
    {"name":time_win_tokens_batch.__name__, "param":{"accum":True,"onset":False}},
]

#Setting Training Parameters
train_param = {
    "DATASET": vitals_cat_dataset_param,
    "MODEL_NAME": "vitals_cat_unimodal",
    "ENCODER_PARAM": [TWEmbed_param, LSTM_param, MLP_param],
    "ENCODER_MODEL": [TimeWinEmbedding.__name__, LSTM.__name__, MLP.__name__],
    "DECODER_PARAM": MLP_decoder_param,
    "DECODER_MODEL": MLPDecoderReg.__name__,
    "BATCH_SIZE": 128,
    "LR": 0.001,
    "MAX_EPOCHS": 20,
    "OPTIMIZER": "Adam",
    "COLLATE_FN_PARAMS": collate_fn_params
}

model = create_unimodal_model(train_param["ENCODER_MODEL"], 
                            train_param["ENCODER_PARAM"], 
                            train_param["DECODER_MODEL"], 
                            train_param["DECODER_PARAM"], device)

collate_batch = CreateCustomDataset(len(collate_fn_params), train_param["COLLATE_FN_PARAMS"], classfication=False)
log = run_kfolds(train_param, model, vitals_cat_dataset, vitals_cat_kfolds, collate_fun=collate_batch, log_folder=log_folder, classification=False)

## ECG

In [None]:
from datasets.LOS.ecg import ECGLoader
ecg_dataset_path = data_folder+"/ecg.pkl"
if os.path.exists(ecg_dataset_path):
    with open(ecg_dataset_path, "rb") as f:
        ecg = pickle.load(f)
else:
# if True:
    ecg = ECGLoader(ids, targets)
    with open(ecg_dataset_path, "wb") as f:
        pickle.dump(ecg, f, protocol=pickle.HIGHEST_PROTOCOL)

ecg_ids = ecg.get_ids(only_valid=True)
ecg_kfolds = stool.get_sub_kfolds(ids, ecg_ids, kfolds)
ecg_sig_kfolds = []
for _train_idx, _valid_idx, _test_idx in ecg_kfolds:
    ecg_sig_kfolds.append([ecg.get_subi2ecgi(_train_idx, only_valid=True), 
                           ecg.get_subi2ecgi(_valid_idx, only_valid=True),
                           ecg.get_subi2ecgi(_test_idx, only_valid=True)])

#### Note(statement)

In [None]:
ecg_note_dataset = ecg.get_ecg_dataset(type="tokens")
ecg_note_dataset_param = {
    "path": ecg_dataset_path,
    "num": len(ecg_note_dataset)
}

In [None]:
from blocks.embedding import Embedding
from blocks.mlp import MLP, MLPDecoderReg
from datasets.collate_fun import CreateCustomDataset, tokens_batch
ecg_vocab_size = int(ecg.ecg_statement_None_label+1)
EMBED_DIM = 256

Embedding_param = {
    "vocab_size": ecg_vocab_size,
    "embed_size": EMBED_DIM,
}

MLP_param = {
    "in_dim": EMBED_DIM,
    "hidden_dim": [EMBED_DIM, EMBED_DIM],
    "drop_prob": 0.25
}

MLP_decoder_param = {
    "in_dim": EMBED_DIM,
    "out_dim": targets_num,
    "hidden_dim": [EMBED_DIM//2],
    "drop_prob": 0.25
}

collate_fn_params = [{"name": tokens_batch.__name__, "param": {"accum": False, "onset": True}}]

#Setting Training Parameters
train_param = {
    "DATASET": ecg_note_dataset_param,
    "MODEL_NAME": "ecg_note_unimodal",
    "ENCODER_PARAM": [Embedding_param, MLP_param],
    "ENCODER_MODEL": [Embedding.__name__, MLP.__name__],
    "DECODER_PARAM": MLP_decoder_param,
    "DECODER_MODEL": MLPDecoderReg.__name__,
    "BATCH_SIZE": 128,
    "LR": 0.001,
    "MAX_EPOCHS": 20,
    "OPTIMIZER": "Adam",
    "COLLATE_FN_PARAMS": collate_fn_params
}

model = create_unimodal_model(train_param["ENCODER_MODEL"],
                            train_param["ENCODER_PARAM"],
                            train_param["DECODER_MODEL"],
                            train_param["DECODER_PARAM"], device)

collate_batch = CreateCustomDataset(len(collate_fn_params), train_param["COLLATE_FN_PARAMS"], classfication=False)   
log = run_kfolds(train_param, model, ecg_note_dataset, ecg_sig_kfolds, log_folder=log_folder, collate_fun=collate_batch, classification=False)

#### Signal

In [None]:
ecg_sig_dataset = ecg.get_ecg_dataset(type="sig")
ecg_sig_dataset_param = {
    "path": ecg_dataset_path,
    "num": len(ecg_sig_dataset)
}

In [None]:
from blocks.resnet import ResNet1d
from blocks.mlp import MLP, MLPDecoderReg
EMBED_DIM = 640
ECG_CHANNEL = 12
SIG_LEN = 640
FILTER_SIZE = [64, 128, 196, 256, 320]
SEQ_LEN = [640, 320, 160, 40, 20]

ResNet_param = {
    "input_dim": (ECG_CHANNEL, SIG_LEN),
    "blocks_dim": list(zip(FILTER_SIZE, SEQ_LEN)),
    "kernel_size": 5,
    "dropout_rate": 0.3
}

MLP_param = {
    "in_dim": FILTER_SIZE[-1] * SEQ_LEN[-1],
    "hidden_dim": [EMBED_DIM],
    "drop_prob": 0.1
}

MLP_decoder_param = {
    "in_dim": MLP_param["hidden_dim"][-1],
    "out_dim": targets_num,
    "hidden_dim": [EMBED_DIM//2],
    "drop_prob": 0.1
}

train_param = {
    "DATASET": ecg_sig_dataset_param,
    "MODEL_NAME": "ecg_sig_unimodal",
    "ENCODER_PARAM": [ResNet_param, MLP_param],
    "ENCODER_MODEL": [ResNet1d.__name__, MLP.__name__],
    "DECODER_PARAM": MLP_decoder_param,
    "DECODER_MODEL": MLPDecoderReg.__name__,
    "BATCH_SIZE": 128,
    "LR": 0.0005,
    "MAX_EPOCHS": 20,
    "OPTIMIZER": "Adam"
}

model = create_unimodal_model(train_param["ENCODER_MODEL"],
                                train_param["ENCODER_PARAM"],
                                train_param["DECODER_MODEL"],
                                train_param["DECODER_PARAM"], device)

log = run_kfolds(train_param, model, ecg_sig_dataset, ecg_sig_kfolds[:1], log_folder=log_folder, classification=False)

#### Feats

In [None]:
ecg_feats_dataset = ecg.get_ecg_dataset(type="feats")
ecg_feats_dataset_param = {
    "path": ecg_dataset_path,
    "num": len(ecg_feats_dataset)
}

In [None]:
from blocks.mlp import MLP, MLPDecoderReg

FEATS_NUM = ecg.ecg_feats.shape[1]
EMBED_DIM = 256

MLP_param = {
    "in_dim": FEATS_NUM,
    "hidden_dim": [EMBED_DIM],
    "drop_prob": 0.05
}

MLP_decoder_param = {
    "in_dim": MLP_param["hidden_dim"][-1],
    "out_dim": targets_num,
    "hidden_dim": [EMBED_DIM//2],
    "drop_prob": 0.05
}

train_param = {
    "DATASET": ecg_feats_dataset_param,
    "MODEL_NAME": "ecg_feats_unimodal",
    "ENCODER_PARAM": [MLP_param],
    "ENCODER_MODEL": [MLP.__name__],
    "DECODER_PARAM": MLP_decoder_param,
    "DECODER_MODEL": MLPDecoderReg.__name__,
    "BATCH_SIZE": 128,
    "LR": 0.001,
    "MAX_EPOCHS": 20,
    "OPTIMIZER": "Adam"
}

model = create_unimodal_model(train_param["ENCODER_MODEL"],
                                train_param["ENCODER_PARAM"],
                                train_param["DECODER_MODEL"],
                                train_param["DECODER_PARAM"], device)


log = run_kfolds(train_param, model, ecg_feats_dataset, ecg_kfolds, log_folder=log_folder,classification=False)


#### ECG Fusion

In [None]:
import numpy as np
from datasets.LOS.ecg import ECGFusionDataset

only_last = True
if only_last:
    all_ecg_ids = ecg.get_ecg_ids()
    all_ecg_ids = list(all_ecg_ids)
    last_ecg_index = []
    for id in ecg_ids:
        last_ecg_id = ecg.subject_dict[id]["ecg_id"][-1]
        last_ecg_index.append(all_ecg_ids.index(last_ecg_id))
    
    ecg_fusion_dataset = ECGFusionDataset(ecg.ecg_signal[last_ecg_index], ecg.ecg_feats[last_ecg_index], 
                                          ecg.ecg_statement_tokens[last_ecg_index], ecg.ecg_targets[last_ecg_index], 
                                          ecg.ecg_ids[last_ecg_index])

else:
    ecg_fusion_dataset = ecg.get_ecg_dataset(type="fusion")

ecg_fusion_dataset_param = {
    "path": ecg_dataset_path,
    "num": len(ecg_fusion_dataset),
    "last": only_last
}

In [None]:
from models.multimodal import BiModalAttn
from blocks.mlp import MLPDecoderReg, MLP
from datasets.collate_fun import CreateCustomDataset, tokens_batch, basic_collate_fn
import math

k_models = True
EMBED_DIM = 256

encoders_i = [0,1,2]

ecg_sig_model_param = {
    "model_path": log_folder+"/ecg_sig_unimodal/.../",
    "out_dim": 640
}

ecg_feats_model_param = {
    "model_path": log_folder+"/ecg_feats_unimodal/.../",
    "out_dim": 256
}

ecg_note_model_param = {
    "model_path": log_folder+"/ecg_note_unimodal/.../",
    "out_dim": 256
}

BiModelAttn_param = {
    "embed_size": EMBED_DIM,
    "num_blocks": 1,
    "num_heads": 64,
    "drop_prob": 0.1,
    "fusion_type": "add"
}

shared_layer_param = {
    "in_dim": EMBED_DIM,
    "hidden_dim": [EMBED_DIM],
    "drop_prob": 0.05,
    "BatchNorm": False
}

DECODER_IN_DIM = int(EMBED_DIM*len(encoders_i) + EMBED_DIM*math.comb(len(encoders_i), 2))
MLP_decoder_param = {
    "in_dim": DECODER_IN_DIM,
    "out_dim": targets_num,
    "hidden_dim": [DECODER_IN_DIM//2],
    "drop_prob": 0.1
}

collate_fn_params = [
    {"name": basic_collate_fn.__name__},
    {"name": basic_collate_fn.__name__},
    {"name": tokens_batch.__name__, "param": {"accum": False, "onset": True}}
]

train_param = {
    "DATASET": ecg_fusion_dataset_param,
    "MODEL_NAME": "ecg_fusion_unimodal",
    "LAST": only_last,
    "ENCODERS_I": encoders_i,
    "ENCODERS_PARAM": [ecg_sig_model_param, ecg_feats_model_param, ecg_note_model_param],
    "INTER_MODEL": BiModalAttn.__name__,
    "INTER_MODEL_PARAM": BiModelAttn_param,
    "SHARED_LAYER_PARAM": shared_layer_param,
    "DECODER_MODEL": MLPDecoderReg.__name__,
    "DECODER_PARAM": MLP_decoder_param,
    "EMBED_DIM": EMBED_DIM,
    "BATCH_SIZE": 128,
    "LR": 5E-05,
    "MAX_EPOCHS": 20,
    "OPTIMIZER": "Adam",
    "COLLATE_FN_PARAMS": collate_fn_params
}

model = create_multimodal_model(train_param, device, k_models=k_models)

collate_batch = CreateCustomDataset(len(collate_fn_params), train_param["COLLATE_FN_PARAMS"], classfication=False)
log = run_kfolds(train_param, model, ecg_fusion_dataset, ecg_sig_kfolds, log_folder=log_folder, collate_fun = collate_batch, classification=False)        

#### Hierarchical Time-Aware Fusion

In [None]:
ecg_dataset = ecg.get_dataset(only_valid=True)
ecg_dataset_param = {
    "path": ecg_dataset_path,
    "only_valid": True,
    "num": len(ecg_dataset)
}

In [None]:
from torch import nn
from datasets.collate_fun import CreateCustomDataset, ecg_bags_batch
from blocks.mlp import MLP, MLPDecoderReg
from models.unimodal import TemporalPooling
from blocks.resnet import ResNet1d

WIN_NUM = int(len(ecg.ecg_win_i))
k_models = 5
EMBED_DIM = 512

TemporalPooling_param = {
    "model_path": log_folder+"/ecg_fusion_unimodal/.../",
    "win_size": WIN_NUM,
    "device": device
}

embeds_model = torch.load(TemporalPooling_param["model_path"]+"model_0.pth", weights_only=False)
embeds_dim = embeds_model.shared_decoder.mlp[-1].in_features
embeds_model.shared_decoder.mlp[-1] = nn.Identity()
TemporalPooling_param["embeds_model"] = embeds_model

ResNet_param = {
    "input_dim": (embeds_dim, WIN_NUM),
    "blocks_dim": [(embeds_dim//2, 5)],
    "kernel_size": 3,
    "dropout_rate": 0.3
}

MLP_param = {
    "in_dim": ResNet_param["blocks_dim"][-1][0] * ResNet_param["blocks_dim"][-1][1],
    "hidden_dim": [EMBED_DIM],
    "drop_prob": 0.05
}

MLP_decoder_param = {
    "in_dim": EMBED_DIM,
    "out_dim": targets_num,
    "hidden_dim": [EMBED_DIM//2],
    "drop_prob": 0.1
}

collate_fn_params = [{"name":ecg_bags_batch.__name__}]

train_param = {
    "DATASET": ecg_dataset_param,
    "MODEL_NAME": "ecg_unimodal",
    "ENCODER_PARAM": [TemporalPooling_param, ResNet_param, MLP_param],
    "ENCODER_MODEL": [TemporalPooling.__name__, ResNet1d.__name__, MLP.__name__],
    "DECODER_PARAM": MLP_decoder_param,
    "DECODER_MODEL": MLPDecoderReg.__name__,
    "BATCH_SIZE": 32,
    "LR": 0.0005,
    "MAX_EPOCHS": 20,
    "OPTIMIZER": "Adam",
    "COLLATE_FN_PARAMS": collate_fn_params
}

model = []
if k_models is not None:
    for mi in range(k_models):
        embeds_model = torch.load(TemporalPooling_param["model_path"]+"model_%i.pth"%(mi), weights_only=False)
        embeds_model.shared_decoder.mlp[-1] = nn.Identity()
        TemporalPooling_param["embeds_model"] = embeds_model
        
        _model = create_unimodal_model(train_param["ENCODER_MODEL"],
                                    train_param["ENCODER_PARAM"],
                                    train_param["DECODER_MODEL"],
                                    train_param["DECODER_PARAM"], device)
        model.append(_model)
else:   
    model = create_unimodal_model(train_param["ENCODER_MODEL"],
                                    train_param["ENCODER_PARAM"],
                                    train_param["DECODER_MODEL"],
                                    train_param["DECODER_PARAM"], device)

del TemporalPooling_param["embeds_model"]

collate_batch = CreateCustomDataset(1, train_param["COLLATE_FN_PARAMS"], classfication=False)

log = run_kfolds(train_param, model, ecg_dataset, ecg_kfolds, collate_fun=collate_batch, log_folder=log_folder, classification=False)