In [1]:
# Lightning
import lightning as L
from lightning.pytorch.callbacks import EarlyStopping, ModelCheckpoint
from lightning.pytorch.loggers import CSVLogger

# PyTorch
import torch
import torch.nn.functional as F
from torch import nn
from torch.utils.data import Dataset, DataLoader

# Data Processing
import pandas as pd
import numpy as np
import h5py
import csv

# Standard Library
import os

# Custom Utilities
from models.transformer import Transformer
from utils.evaluation_toolkit import get_all_metrics, run_bootstrap, check_metric_is_better, plot_prediction_correctness_by_label
from utils.constants import *

  from .autonotebook import tqdm as notebook_tqdm
Fetching 30 files: 100%|██████████| 30/30 [00:00<?, ?it/s]
Some weights of LongformerModel were not initialized from the model checkpoint at yikuan8/Clinical-Longformer and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


cuda available: True
model device: cuda:0


In [2]:
def load_from_hdf5(filename):
    data = []
    with h5py.File(filename, 'r') as f:
        for key in f.keys():
            entry = {}
            grp = f[key]
            for k in grp.keys():
                entry[k] = grp[k][()]
            data.append(entry)
    return data

class EhrDataset(Dataset):
    def __init__(self, data_path, mode='train'):
        super().__init__()
        self.data = load_from_hdf5(f"{data_path}/{mode}.h5")

    def __len__(self):
        return len(self.data) # number of patients

    def __getitem__(self, index):
        pid = self.data[index]['PatientID']
        x_ehr = torch.tensor(self.data[index]['X']) # preprocessed data
        x_note = torch.tensor(self.data[index]['Note']) # embedding
        x_summary = torch.tensor(self.data[index]['Summary']) # embedding
        y = torch.tensor(self.data[index]['Y'])

        # print(x_ehr.shape, x_note.shape, x_summary.shape, y.shape)
        y_outcome = y[0]
        y_readmission = y[1]
        
        return pid, x_ehr.float(), x_note.float(), x_summary.float(), y_outcome.float(), y_readmission.float()


class EhrDataModule(L.LightningDataModule):
    def __init__(self, data_path, batch_size):
        super().__init__()
        self.data_path = data_path
        self.batch_size = batch_size
        self.train_dataset = EhrDataset(self.data_path, mode="train")
        self.val_dataset = EhrDataset(self.data_path, mode='val')
        self.test_dataset = EhrDataset(self.data_path, mode='test')

    def train_dataloader(self):
        return DataLoader(self.train_dataset, batch_size=self.batch_size, shuffle=True, num_workers=0)

    def val_dataloader(self):
        return DataLoader(self.val_dataset, batch_size=self.batch_size, shuffle=False, num_workers=0)

    def test_dataloader(self):
        return DataLoader(self.test_dataset, batch_size=self.batch_size, shuffle=False, num_workers=0)

In [3]:
class EhrEncoder(nn.Module):
    def __init__(self, input_dim, hidden_dim, ehr_net='gru'):
        super().__init__()
        self.ehr_net = ehr_net
        if ehr_net == 'lstm':
            self.encoder = nn.LSTM(input_size=input_dim, hidden_size=hidden_dim, num_layers=1, batch_first=True)
        elif ehr_net == 'gru':
            self.encoder = nn.GRU(input_size=input_dim, hidden_size=hidden_dim, num_layers=1, batch_first=True)
        elif ehr_net == 'rnn':
            self.encoder= nn.RNN(input_size=input_dim, hidden_size=hidden_dim, num_layers=1, batch_first=True)
        elif ehr_net == 'transformer':
            self.encoder = Transformer(input_dim=input_dim, hidden_dim=hidden_dim)
        else:
            raise ValueError(f"Invalid EHR network type: {ehr_net}")

    def forward(self, x):
        if self.ehr_net in ["lstm", "gru", "rnn"]:
            output, _ = self.encoder(x)
            output = output[:, -1, :]
        elif self.ehr_net == 'transformer':
            output = self.encoder(x)
        return output

In [4]:
class FusionNetwork(nn.Module):
    def __init__(self, hidden_dim, num_heads):
        super().__init__()
        self.attention1 = nn.MultiheadAttention(embed_dim=hidden_dim, num_heads=num_heads, batch_first=True)
        self.attention2 = nn.MultiheadAttention(embed_dim=hidden_dim, num_heads=num_heads, batch_first=True)
        self.out_proj = nn.Linear(2 * hidden_dim, hidden_dim)

    def forward(self, modality_1, modality_2):
        # Apply cross attention: modality_1 as context for modality_2
        attn_output_1, attn_output_weights_1 = self.attention1(query=modality_2, key=modality_1, value=modality_1)
        # Apply cross attention: modality_2 as context for modality_1
        attn_output_2, attn_output_weights_2 = self.attention2(query=modality_1, key=modality_2, value=modality_2)
        
        # Concatenate attention outputs along the sequence length dimension
        combined_output = torch.cat((attn_output_1, attn_output_2), dim=-1).squeeze(dim=1) # [B, T, 2 * hidden_dim]
        
        # Apply attention-based adaptive pooling
        pooled_context_vector = self.out_proj(combined_output) # [B, hidden_dim]

        return pooled_context_vector

class MAGGate(nn.Module):
    def __init__(self, hidden_dim, drop=0.0):
        super().__init__()
        self.fc1 = nn.Linear(hidden_dim * 2, 1)
        self.fc3 = nn.Linear(hidden_dim, hidden_dim)
        self.beta = nn.Parameter(torch.randn((1,)))
        self.norm = nn.LayerNorm(hidden_dim)
        self.dropout = nn.Dropout(drop)

    def forward(self, inp1, inp2):
        w2 = torch.sigmoid(self.fc1(torch.cat([inp1, inp2], -1)))
        adjust = self.fc3(w2 * inp2)
        one = torch.tensor(1).type_as(adjust)
        alpha = torch.min(torch.norm(inp1) / torch.norm(adjust) * self.beta, one)
        output = inp1 + alpha * adjust
        output = self.dropout(self.norm(output))
        return output
    
    
class Outer(nn.Module):
    def __init__(self, hidden_dim):
        super().__init__()
        self.hidden_dim = hidden_dim
        self.ff_net = nn.Sequential(
            nn.Linear((hidden_dim + 1) * (hidden_dim + 1), hidden_dim),
            nn.GELU(),
            nn.Linear(hidden_dim, hidden_dim),
            nn.GELU(),
        )

    def forward(self, inp1, inp2):
        B = inp1.size(0)
        append = torch.ones((B, 1)).type_as(inp1)
        inp1 = torch.cat([inp1, append], dim=-1)
        inp2 = torch.cat([inp2, append], dim=-1)
        fusion = torch.zeros((B, self.hidden_dim + 1, self.hidden_dim + 1)).type_as(inp1)
        for i in range(B):
            fusion[i] = torch.outer(inp1[i], inp2[i])
        return self.ff_net(fusion.flatten(1))


class Concatenation(nn.Module):
    def __init__(self, hidden_dim):
        super().__init__()
        self.hidden_dim = hidden_dim
        self.net = nn.Sequential(
            nn.Linear(hidden_dim * 2, hidden_dim),
            nn.GELU(),
        )

    def forward(self, inp1, inp2):
        return self.net(torch.cat([inp1, inp2], dim=-1))


class EMERGE(nn.Module):
    def __init__(self, input_ehr_dim, input_note_dim, input_summary_dim, hidden_dim, ehr_net='gru', text_fusion='concat', modality_fusion='ours', num_heads=4, use_modality='ehr_note_summary'): # [ehr_only, note_only, summary_only, ehr_note, ehr_summary, note_summary, ehr_note_summary]
        super().__init__()
        self.text_fusion = text_fusion
        self.modality_fusion = modality_fusion
        self.use_modality = use_modality

        self.ehr_encoder = EhrEncoder(input_dim=input_ehr_dim, hidden_dim=hidden_dim, ehr_net=ehr_net)
        self.note_embedding_proj = nn.Linear(input_note_dim, hidden_dim)
        self.summary_embedding_proj = nn.Linear(input_summary_dim, hidden_dim)

        # for text fusion
        self.concat_text_fusion_proj = nn.Linear(hidden_dim * 2, hidden_dim)
        self.adaptive_text_fusion_param = nn.Parameter(torch.tensor(0.5))
        self.mag_gate_text_fusion = MAGGate(hidden_dim)

        # for modality fusion
        if modality_fusion == 'ours':
            self.fusion_module = FusionNetwork(hidden_dim, num_heads)    
        elif modality_fusion == 'mag':
            self.fusion_module = MAGGate(hidden_dim)
        elif modality_fusion == 'concat':
            self.fusion_module = Concatenation(hidden_dim)
            
        elif modality_fusion == 'tf':
            self.fusion_module = Outer(hidden_dim)
        
    def forward(self, x_ehr, x_note_embedding, x_summary_embedding):
        # Process EHR data
        ehr_embedding = self.ehr_encoder(x_ehr) # Shape: [B, hidden_dim]

        # Project text embeddings to the same dimension
        note_embedding_projected = self.note_embedding_proj(x_note_embedding)
        summary_embedding_projected = self.summary_embedding_proj(x_summary_embedding)

        ###########################
        if self.text_fusion == 'note_only':
            text_embedding = note_embedding_projected
        elif self.text_fusion == 'summary_only':
            text_embedding = summary_embedding_projected
        elif self.text_fusion == 'add':
            text_embedding = note_embedding_projected + summary_embedding_projected
        elif self.text_fusion == 'concat':
            text_embedding = torch.cat([note_embedding_projected, summary_embedding_projected], dim=-1)
            text_embedding = self.concat_text_fusion_proj(text_embedding)
        elif self.text_fusion == 'adaptive':
            text_embedding = self.adaptive_text_fusion_param * note_embedding_projected + (1 - self.adaptive_text_fusion_param) * summary_embedding_projected
        elif self.text_fusion == 'mag':
            text_embedding = self.mag_gate_text_fusion(note_embedding_projected, summary_embedding_projected) # Shape: [B, hidden_dim]

        # Apply modality fusion
        if self.modality_fusion == 'ours':
            fused_embed = self.fusion_module(ehr_embedding.unsqueeze(dim=1), text_embedding.unsqueeze(dim=1))
        else:
            fused_embed = self.fusion_module(ehr_embedding, text_embedding)
        
        # according to use_modality
        if self.use_modality == 'ehr_note_summary':
            return fused_embed
        elif self.use_modality == 'ehr_only':
            return ehr_embedding
        elif self.use_modality == 'note_only':
            return note_embedding_projected
        elif self.use_modality == 'summary_only':
            return summary_embedding_projected
        elif self.use_modality == 'ehr_note':
            return fused_embed
        elif self.use_modality == 'ehr_summary':
            return fused_embed
        elif self.use_modality == 'note_summary':
            return text_embedding
        else:
            raise ValueError(f"Invalid use_modality: {self.use_modality}")

In [5]:
class MultitaskHead(nn.Module):
    def __init__(self, hidden_dim, output_dim, act_layer=nn.GELU, drop=0.25):
        super(MultitaskHead, self).__init__()
        self.hidden_dim = (hidden_dim,)
        self.output_dim = (output_dim,)
        self.act = act_layer()
        self.outcome_task_head = nn.Sequential(
            nn.Linear(hidden_dim, hidden_dim),
            nn.Dropout(drop),
            nn.Linear(hidden_dim, output_dim),
            nn.Sigmoid(),
        )
        self.readmission_task_head = nn.Sequential(
            nn.Linear(hidden_dim, hidden_dim),
            nn.Dropout(drop),
            nn.Linear(hidden_dim, output_dim),
            nn.Sigmoid(),
        )

    def forward(self, x):
        x = self.act(x)
        outcome = self.outcome_task_head(x)
        readmission = self.readmission_task_head(x)
        return torch.cat([outcome, readmission], dim=1)

class MultitaskLoss(nn.Module):
    def __init__(self, task_num=2, focus_task='multitask'):
        super(MultitaskLoss, self).__init__()
        self.task_num = task_num
        self.focus_task=focus_task
        self.alpha = nn.Parameter(torch.ones((task_num)))
        self.bce0 = nn.BCELoss()
        self.bce1 = nn.BCELoss()

    def forward(self, outcome_pred, readmission_pred, outcome_true, readmission_true):
        loss0 = self.bce0(outcome_pred, outcome_true)
        loss1 = self.bce1(readmission_pred, readmission_true)
        if self.focus_task == "outcome":
            return loss0
        if self.focus_task == "readmission":
            return loss1
        return loss0 * self.alpha[0] + loss1 * self.alpha[1]

In [6]:
class Pipeline(L.LightningModule):
    def __init__(self, config):
        super().__init__()
        self.hidden_dim = config["hidden_dim"]
        self.learning_rate = config["learning_rate"]
        self.input_ehr_dim = config["input_ehr_dim"]
        self.input_note_dim = config["input_note_dim"]
        self.input_summary_dim = config["input_summary_dim"]
        self.focus_task = config["focus_task"]
        self.text_fusion = config["text_fusion"]
        self.modality_fusion = config["modality_fusion"]
        self.use_modality = config["use_modality"]
        self.ehr_net = config["ehr_net"]
        self.output_dim = 1

        self.model = EMERGE(input_ehr_dim=self.input_ehr_dim, input_note_dim=self.input_note_dim, input_summary_dim=self.input_summary_dim, hidden_dim=self.hidden_dim, ehr_net=self.ehr_net, text_fusion=self.text_fusion, modality_fusion=self.modality_fusion, use_modality=self.use_modality)
        self.head = MultitaskHead(self.hidden_dim, self.output_dim)
        self.loss_fn = MultitaskLoss(task_num=2, focus_task=self.focus_task)

        self.cur_best_performance = {} # val set
        self.test_performance = {} # test set

        self.validation_step_outputs = []
        self.test_step_outputs = []
        self.test_outputs = {}


    def forward(self, batch):
        pid, x_ehr, x_note, x_summary, y_outcome, y_readmission = batch
        embedding = self.model(x_ehr, x_note, x_summary).to(x_ehr.device)
        y_hat = self.head(embedding)
        return y_hat

    def _get_loss(self, batch):
        pid, x_ehr, x_note, x_summary, y_outcome, y_readmission = batch
        y_hat = self(batch)
        loss = self.loss_fn(y_hat[:,0], y_hat[:,1], y_outcome, y_readmission)
        return loss, y_hat

    def training_step(self, batch, batch_idx):
        loss, y_hat = self._get_loss(batch)
        # self.log("train_loss", loss)
        return loss

    def validation_step(self, batch, batch_idx):
        pid, x_ehr, x_note, x_summary, y_outcome, y_readmission = batch
        loss, y_hat = self._get_loss(batch)
        # self.log("val_loss", loss)
        outs = {'y_outcome_pred': y_hat[:,0], 'y_readmission_pred': y_hat[:,1], 'y_outcome_true': y_outcome, 'y_readmission_true': y_readmission, 'val_loss': loss}
        self.validation_step_outputs.append(outs)
        return loss

    def on_validation_epoch_end(self):
        y_outcome_pred = torch.cat([x['y_outcome_pred'] for x in self.validation_step_outputs]).detach().cpu()
        y_readmission_pred = torch.cat([x['y_readmission_pred'] for x in self.validation_step_outputs]).detach().cpu()
        y_outcome_true = torch.cat([x['y_outcome_true'] for x in self.validation_step_outputs]).detach().cpu()
        y_readmission_true = torch.cat([x['y_readmission_true'] for x in self.validation_step_outputs]).detach().cpu()
        loss = torch.stack([x['val_loss'] for x in self.validation_step_outputs]).mean().detach().cpu()
        # self.log("val_loss_epoch", loss)

        metrics = get_all_metrics(y_outcome_pred, y_readmission_pred, y_outcome_true, y_readmission_true)
        for k, v in metrics.items(): self.log(k, v)

        main_metric = "outcome_auroc" if self.focus_task in ["outcome", "multitask"] else "readmission_auroc"
        main_score = metrics[main_metric]
        if check_metric_is_better(self.cur_best_performance, main_score, main_metric):
            self.cur_best_performance = metrics
            for k, v in metrics.items(): self.log("best_"+k, v)
        self.validation_step_outputs.clear()
        return main_score

    def test_step(self, batch, batch_idx):
        pid, x_ehr, x_note, x_summary, y_outcome, y_readmission = batch
        loss, y_hat = self._get_loss(batch)
        # self.log("test_loss", loss)
        outs = {'y_outcome_pred': y_hat[:,0], 'y_readmission_pred': y_hat[:,1], 'y_outcome_true': y_outcome, 'y_readmission_true': y_readmission, 'test_loss': loss}
        self.test_step_outputs.append(outs)
        return loss

    def on_test_epoch_end(self):
        y_outcome_pred = torch.cat([x['y_outcome_pred'] for x in self.test_step_outputs]).detach().cpu()
        y_readmission_pred = torch.cat([x['y_readmission_pred'] for x in self.test_step_outputs]).detach().cpu()
        y_outcome_true = torch.cat([x['y_outcome_true'] for x in self.test_step_outputs]).detach().cpu()
        y_readmission_true = torch.cat([x['y_readmission_true'] for x in self.test_step_outputs]).detach().cpu()
        loss = torch.stack([x['test_loss'] for x in self.test_step_outputs]).mean().detach().cpu()
        # self.log("test_loss_epoch", loss)

        test_performance = get_all_metrics(y_outcome_pred, y_readmission_pred, y_outcome_true, y_readmission_true)
        for k, v in test_performance.items(): self.log("test_"+k, v)

        self.test_outputs = {'y_outcome_pred': y_outcome_pred, 'y_readmission_pred': y_readmission_pred, 'y_outcome_true': y_outcome_true, 'y_readmission_true': y_readmission_true, 'test_loss': loss}
        self.test_step_outputs.clear()

        self.test_performance = test_performance
        return test_performance

    def configure_optimizers(self):
        optimizer = torch.optim.AdamW(self.parameters(), lr=self.learning_rate)
        return optimizer

In [7]:
def run_experiment(config):
    # data
    dm = EhrDataModule(f'{config["dataset"]}', batch_size=config["batch_size"])
    logger = CSVLogger(save_dir="logs", name=f'{config["dataset"]}/{config["focus_task"]}', version=f"{config['model']}")

    main_metric = "outcome_auroc" if config["focus_task"] in ["outcome", "multitask"] else "readmission_auroc"
    # EarlyStop and checkpoint callback
    early_stopping_callback = EarlyStopping(monitor=main_metric, patience=config["patience"], mode="max",)
    checkpoint_callback = ModelCheckpoint(filename="best", monitor=main_metric, mode="max")

    L.seed_everything(42) # seed for reproducibility

    # train/val/test
    pipeline = Pipeline(config)
    trainer = L.Trainer(accelerator="gpu", devices=[0], max_epochs=config["epochs"], logger=logger, callbacks=[early_stopping_callback, checkpoint_callback])
    # trainer = L.Trainer(accelerator="gpu", devices=[0], max_epochs=config["epochs"], callbacks=[early_stopping_callback, checkpoint_callback])
    trainer.fit(pipeline, dm)

    # Load best model checkpoint
    best_model_path = checkpoint_callback.best_model_path
    print("best_model_path:", best_model_path)
    pipeline = Pipeline.load_from_checkpoint(best_model_path, config=config)
    trainer.test(pipeline, dm)

    perf = pipeline.test_performance
    outs = pipeline.test_outputs
    return perf, outs

In [8]:
# # Author
# config = {
#     'model': 'EMERGE',
#     'dataset': 'mimic4', # ['mimic3', 'mimic4']
#     'input_ehr_dim': 61,
#     'input_note_dim': 768, # ClinicalLongformer embedding
#     'input_summary_dim': 768, # ClinicalLongformer embedding
#     'learning_rate': 1e-3,
#     'hidden_dim': 128,
#     'focus_task': 'outcome', # ['outcome', 'readmission'] no multitask
#     'ehr_net': 'gru', # ['lstm', 'gru', 'rnn', 'transformer']
#     'text_fusion': 'concat', # ['note_only', 'summary_only', 'add', 'concat', 'adaptive', 'mag']
#     'modality_fusion': 'ours', # ['ours', 'mag', 'concat', 'tf']
#     'use_modality': 'ehr_note_summary', # [ehr_only, note_only, summary_only, ehr_note, ehr_summary, note_summary, ehr_note_summary]
#     'batch_size': 256,
#     'epochs': 50,
#     'patience': 5,
# }

In [9]:
config = {
    'model': 'EMERGE',
    'dataset': DATA_PATH,
    'input_ehr_dim': 61,
    'input_note_dim': 768, # ClinicalLongformer embedding
    'input_summary_dim': 768, # ClinicalLongformer embedding
    'learning_rate': 1e-3,
    'hidden_dim': 128,
    'focus_task': 'outcome', # ['outcome', 'readmission'] no multitask
    'ehr_net': 'gru', # ['lstm', 'gru', 'rnn', 'transformer']
    'text_fusion': 'concat', # ['note_only', 'summary_only', 'add', 'concat', 'adaptive', 'mag']
    'modality_fusion': 'ours', # ['ours', 'mag', 'concat', 'tf']
    'use_modality': 'ehr_only', # [ehr_only, note_only, summary_only, ehr_note, ehr_summary, note_summary, ehr_note_summary]
    'batch_size': 256,
    'epochs': 50, #
    'patience': 5, #
}

def experimenting(focus_task, use_modality):
    config['focus_task'] = focus_task
    config['use_modality'] = use_modality
    config['model'] = 'EMERGE' + f"_hid{config['hidden_dim']}_ehr{config['ehr_net']}_text{config['text_fusion']}_{config['modality_fusion']}_{config['use_modality']}"

    perf, outs = run_experiment(config)

    y_outcome_pred = outs['y_outcome_pred']
    y_readmission_pred = outs['y_readmission_pred']
    y_outcome_true = outs['y_outcome_true']
    y_readmission_true = outs['y_readmission_true']

    bootstrap_results = run_bootstrap(y_outcome_pred, y_readmission_pred, y_outcome_true, y_readmission_true)

    print(f"\n\nExperimenting with focus_task: {focus_task}, use_modality: {use_modality}")
    record = ""
    for k, v in bootstrap_results.items():
        title = str(k) + " " * (25 - len(k))
        record += f"{title} {v['mean'] * 100:.2f}±{v['std'] * 100:.2f}" + "\n"
    SAVE_DIR = f"results/{config['focus_task']}/"
    with open(SAVE_DIR + f"{config['use_modality']}.txt", "w", encoding="utf-8") as f:
        f.write(record)

    if focus_task == "outcome":
        plot_prediction_correctness_by_label(labels=y_outcome_true, preds=y_outcome_pred, 
                                             title=f"Plot for {focus_task} task using {use_modality} modality", SAVE_DIR=SAVE_DIR + f"{config['use_modality']}.png")
    else:
        plot_prediction_correctness_by_label(labels=y_readmission_true, preds=y_readmission_pred, 
                                             title=f"Plot for {focus_task} task using {use_modality} modality", SAVE_DIR=SAVE_DIR + f"{config['use_modality']}.png")

In [10]:
for focus_task in ['outcome', 'readmission']:
    for use_modality in ['ehr_only', 'note_only', 'summary_only', 'ehr_note', 'ehr_summary', 'note_summary', 'ehr_note_summary']:
        if use_modality in ['note_only', 'ehr_note']:
            config['text_fusion'] = 'note_only'
        elif use_modality in ['summary_only', 'ehr_summary']:
            config['text_fusion'] = 'summary_only'
        else:
            config['text_fusion'] = 'concat'
        experimenting(focus_task, use_modality)

Seed set to 42
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 3060') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type          | Params | Mode 
--------------------------------------------------
0 | model   | EMERGE        | 485 K  | train
1 | head    | MultitaskHead | 33.3 K | train
2 | loss_fn | MultitaskLoss | 2      | train
--------------------------------------------------
518 K     Trainable params
0         Non-trainable params
518 K     Total params
2.074     Total estimated model params size (MB)
32        Modules in train mode
0         Modules 

Sanity Checking DataLoader 0:   0%|          | 0/2 [00:00<?, ?it/s]

d:\Lab\Research\HERMES-EHR\.venv\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:433: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=19` in the `DataLoader` to improve performance.


                                                                           

d:\Lab\Research\HERMES-EHR\.venv\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:433: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=19` in the `DataLoader` to improve performance.
d:\Lab\Research\HERMES-EHR\.venv\Lib\site-packages\lightning\pytorch\loops\fit_loop.py:310: The number of training batches (44) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


Epoch 10: 100%|██████████| 44/44 [00:00<00:00, 56.96it/s, v_num=only]
best_model_path: D:/Lab/Research/HERMES-EHR/good_data/complete//outcome\EMERGE_hid128_ehrgru_textconcat_ours_ehr_only\checkpoints\best.ckpt


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
d:\Lab\Research\HERMES-EHR\.venv\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:433: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=19` in the `DataLoader` to improve performance.


Testing DataLoader 0: 100%|██████████| 13/13 [00:00<00:00, 99.88it/s] 
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
   test_outcome_auprc        0.531432032585144
   test_outcome_auroc       0.8367844820022583
   test_outcome_minpse      0.5068181753158569
 test_readmission_auprc     0.2494795024394989
 test_readmission_auroc     0.6246361136436462
 test_readmission_minpse    0.3005698025226593
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


Experimenting with focus_task: outcome, use_modality: ehr_only


Seed set to 42
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type          | Params | Mode 
--------------------------------------------------
0 | model   | EMERGE        | 485 K  | train
1 | head    | MultitaskHead | 33.3 K | train
2 | loss_fn | MultitaskLoss | 2      | train
--------------------------------------------------
518 K     Trainable params
0         Non-trainable params
518 K     Total params
2.074     Total estimated model params size (MB)
32        Modules in train mode
0         Modules in eval mode


                                                                            

d:\Lab\Research\HERMES-EHR\.venv\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:433: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=19` in the `DataLoader` to improve performance.
d:\Lab\Research\HERMES-EHR\.venv\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:433: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=19` in the `DataLoader` to improve performance.
d:\Lab\Research\HERMES-EHR\.venv\Lib\site-packages\lightning\pytorch\loops\fit_loop.py:310: The number of training batches (44) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


Epoch 25: 100%|██████████| 44/44 [00:00<00:00, 50.93it/s, v_num=only]
best_model_path: D:/Lab/Research/HERMES-EHR/good_data/complete//outcome\EMERGE_hid128_ehrgru_textnote_only_ours_note_only\checkpoints\best.ckpt


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
d:\Lab\Research\HERMES-EHR\.venv\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:433: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=19` in the `DataLoader` to improve performance.


Testing DataLoader 0: 100%|██████████| 13/13 [00:00<00:00, 97.24it/s] 
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
   test_outcome_auprc       0.3563777208328247
   test_outcome_auroc       0.7813241481781006
   test_outcome_minpse      0.3634311556816101
 test_readmission_auprc     0.16044695675373077
 test_readmission_auroc     0.44101351499557495
 test_readmission_minpse    0.18090766668319702
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


Experimenting with focus_task: outcome, use_modality: note_only


Seed set to 42
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type          | Params | Mode 
--------------------------------------------------
0 | model   | EMERGE        | 485 K  | train
1 | head    | MultitaskHead | 33.3 K | train
2 | loss_fn | MultitaskLoss | 2      | train
--------------------------------------------------
518 K     Trainable params
0         Non-trainable params
518 K     Total params
2.074     Total estimated model params size (MB)
32        Modules in train mode
0         Modules in eval mode


                                                                            

d:\Lab\Research\HERMES-EHR\.venv\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:433: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=19` in the `DataLoader` to improve performance.
d:\Lab\Research\HERMES-EHR\.venv\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:433: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=19` in the `DataLoader` to improve performance.
d:\Lab\Research\HERMES-EHR\.venv\Lib\site-packages\lightning\pytorch\loops\fit_loop.py:310: The number of training batches (44) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


Epoch 27: 100%|██████████| 44/44 [00:00<00:00, 60.13it/s, v_num=only]
best_model_path: D:/Lab/Research/HERMES-EHR/good_data/complete//outcome\EMERGE_hid128_ehrgru_textsummary_only_ours_summary_only\checkpoints\best.ckpt


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
d:\Lab\Research\HERMES-EHR\.venv\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:433: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=19` in the `DataLoader` to improve performance.


Testing DataLoader 0: 100%|██████████| 13/13 [00:00<00:00, 100.23it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
   test_outcome_auprc       0.38401129841804504
   test_outcome_auroc       0.7879471182823181
   test_outcome_minpse      0.4263038635253906
 test_readmission_auprc     0.35986757278442383
 test_readmission_auroc     0.7016844153404236
 test_readmission_minpse    0.37755101919174194
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


Experimenting with focus_task: outcome, use_modality: summary_only


Seed set to 42
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type          | Params | Mode 
--------------------------------------------------
0 | model   | EMERGE        | 485 K  | train
1 | head    | MultitaskHead | 33.3 K | train
2 | loss_fn | MultitaskLoss | 2      | train
--------------------------------------------------
518 K     Trainable params
0         Non-trainable params
518 K     Total params
2.074     Total estimated model params size (MB)
32        Modules in train mode
0         Modules in eval mode


                                                                            

d:\Lab\Research\HERMES-EHR\.venv\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:433: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=19` in the `DataLoader` to improve performance.
d:\Lab\Research\HERMES-EHR\.venv\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:433: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=19` in the `DataLoader` to improve performance.
d:\Lab\Research\HERMES-EHR\.venv\Lib\site-packages\lightning\pytorch\loops\fit_loop.py:310: The number of training batches (44) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


Epoch 12: 100%|██████████| 44/44 [00:00<00:00, 53.16it/s, v_num=note]
best_model_path: D:/Lab/Research/HERMES-EHR/good_data/complete//outcome\EMERGE_hid128_ehrgru_textnote_only_ours_ehr_note\checkpoints\best.ckpt


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
d:\Lab\Research\HERMES-EHR\.venv\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:433: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=19` in the `DataLoader` to improve performance.


Testing DataLoader 0: 100%|██████████| 13/13 [00:00<00:00, 95.72it/s] 
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
   test_outcome_auprc       0.5821055173873901
   test_outcome_auroc       0.8694729804992676
   test_outcome_minpse      0.5590909123420715
 test_readmission_auprc     0.11727380007505417
 test_readmission_auroc     0.26783978939056396
 test_readmission_minpse    0.18062500655651093
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


Experimenting with focus_task: outcome, use_modality: ehr_note


Seed set to 42
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type          | Params | Mode 
--------------------------------------------------
0 | model   | EMERGE        | 485 K  | train
1 | head    | MultitaskHead | 33.3 K | train
2 | loss_fn | MultitaskLoss | 2      | train
--------------------------------------------------
518 K     Trainable params
0         Non-trainable params
518 K     Total params
2.074     Total estimated model params size (MB)
32        Modules in train mode
0         Modules in eval mode


                                                                            

d:\Lab\Research\HERMES-EHR\.venv\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:433: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=19` in the `DataLoader` to improve performance.
d:\Lab\Research\HERMES-EHR\.venv\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:433: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=19` in the `DataLoader` to improve performance.
d:\Lab\Research\HERMES-EHR\.venv\Lib\site-packages\lightning\pytorch\loops\fit_loop.py:310: The number of training batches (44) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


Epoch 13: 100%|██████████| 44/44 [00:00<00:00, 50.42it/s, v_num=mary]
best_model_path: D:/Lab/Research/HERMES-EHR/good_data/complete//outcome\EMERGE_hid128_ehrgru_textsummary_only_ours_ehr_summary\checkpoints\best.ckpt


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
d:\Lab\Research\HERMES-EHR\.venv\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:433: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=19` in the `DataLoader` to improve performance.


Testing DataLoader 0: 100%|██████████| 13/13 [00:00<00:00, 94.76it/s] 
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
   test_outcome_auprc       0.5679983496665955
   test_outcome_auroc       0.8634963035583496
   test_outcome_minpse      0.5550561547279358
 test_readmission_auprc     0.1411879062652588
 test_readmission_auroc     0.3812655210494995
 test_readmission_minpse    0.18062500655651093
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


Experimenting with focus_task: outcome, use_modality: ehr_summary


Seed set to 42
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type          | Params | Mode 
--------------------------------------------------
0 | model   | EMERGE        | 485 K  | train
1 | head    | MultitaskHead | 33.3 K | train
2 | loss_fn | MultitaskLoss | 2      | train
--------------------------------------------------
518 K     Trainable params
0         Non-trainable params
518 K     Total params
2.074     Total estimated model params size (MB)
32        Modules in train mode
0         Modules in eval mode


                                                                            

d:\Lab\Research\HERMES-EHR\.venv\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:433: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=19` in the `DataLoader` to improve performance.
d:\Lab\Research\HERMES-EHR\.venv\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:433: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=19` in the `DataLoader` to improve performance.
d:\Lab\Research\HERMES-EHR\.venv\Lib\site-packages\lightning\pytorch\loops\fit_loop.py:310: The number of training batches (44) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


Epoch 22: 100%|██████████| 44/44 [00:00<00:00, 65.62it/s, v_num=mary]
best_model_path: D:/Lab/Research/HERMES-EHR/good_data/complete//outcome\EMERGE_hid128_ehrgru_textconcat_ours_note_summary\checkpoints\best.ckpt


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
d:\Lab\Research\HERMES-EHR\.venv\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:433: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=19` in the `DataLoader` to improve performance.


Testing DataLoader 0: 100%|██████████| 13/13 [00:00<00:00, 107.08it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
   test_outcome_auprc       0.4392915964126587
   test_outcome_auroc       0.8271055817604065
   test_outcome_minpse      0.4477272629737854
 test_readmission_auprc     0.18231889605522156
 test_readmission_auroc     0.46589282155036926
 test_readmission_minpse    0.1807628571987152
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


Experimenting with focus_task: outcome, use_modality: note_summary


Seed set to 42
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type          | Params | Mode 
--------------------------------------------------
0 | model   | EMERGE        | 485 K  | train
1 | head    | MultitaskHead | 33.3 K | train
2 | loss_fn | MultitaskLoss | 2      | train
--------------------------------------------------
518 K     Trainable params
0         Non-trainable params
518 K     Total params
2.074     Total estimated model params size (MB)
32        Modules in train mode
0         Modules in eval mode


                                                                            

d:\Lab\Research\HERMES-EHR\.venv\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:433: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=19` in the `DataLoader` to improve performance.
d:\Lab\Research\HERMES-EHR\.venv\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:433: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=19` in the `DataLoader` to improve performance.
d:\Lab\Research\HERMES-EHR\.venv\Lib\site-packages\lightning\pytorch\loops\fit_loop.py:310: The number of training batches (44) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


Epoch 13: 100%|██████████| 44/44 [00:00<00:00, 57.91it/s, v_num=mary]
best_model_path: D:/Lab/Research/HERMES-EHR/good_data/complete//outcome\EMERGE_hid128_ehrgru_textconcat_ours_ehr_note_summary\checkpoints\best.ckpt


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
d:\Lab\Research\HERMES-EHR\.venv\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:433: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=19` in the `DataLoader` to improve performance.


Testing DataLoader 0: 100%|██████████| 13/13 [00:00<00:00, 99.73it/s] 
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
   test_outcome_auprc       0.5752642750740051
   test_outcome_auroc       0.8735870122909546
   test_outcome_minpse      0.5565611124038696
 test_readmission_auprc     0.12960581481456757
 test_readmission_auroc      0.343986839056015
 test_readmission_minpse    0.18073047697544098
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


Experimenting with focus_task: outcome, use_modality: ehr_note_summary


Seed set to 42
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type          | Params | Mode 
--------------------------------------------------
0 | model   | EMERGE        | 485 K  | train
1 | head    | MultitaskHead | 33.3 K | train
2 | loss_fn | MultitaskLoss | 2      | train
--------------------------------------------------
518 K     Trainable params
0         Non-trainable params
518 K     Total params
2.074     Total estimated model params size (MB)
32        Modules in train mode
0         Modules in eval mode


                                                                            

d:\Lab\Research\HERMES-EHR\.venv\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:433: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=19` in the `DataLoader` to improve performance.
d:\Lab\Research\HERMES-EHR\.venv\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:433: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=19` in the `DataLoader` to improve performance.
d:\Lab\Research\HERMES-EHR\.venv\Lib\site-packages\lightning\pytorch\loops\fit_loop.py:310: The number of training batches (44) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


Epoch 13: 100%|██████████| 44/44 [00:00<00:00, 60.33it/s, v_num=only]
best_model_path: D:/Lab/Research/HERMES-EHR/good_data/complete//readmission\EMERGE_hid128_ehrgru_textconcat_ours_ehr_only\checkpoints\best.ckpt


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
d:\Lab\Research\HERMES-EHR\.venv\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:433: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=19` in the `DataLoader` to improve performance.


Testing DataLoader 0: 100%|██████████| 13/13 [00:00<00:00, 44.92it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
   test_outcome_auprc       0.19963590800762177
   test_outcome_auroc       0.5791345834732056
   test_outcome_minpse      0.2708803713321686
 test_readmission_auprc     0.5284968614578247
 test_readmission_auroc     0.7815971374511719
 test_readmission_minpse    0.49653980135917664
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


Experimenting with focus_task: readmission, use_modality: ehr_only


Seed set to 42
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type          | Params | Mode 
--------------------------------------------------
0 | model   | EMERGE        | 485 K  | train
1 | head    | MultitaskHead | 33.3 K | train
2 | loss_fn | MultitaskLoss | 2      | train
--------------------------------------------------
518 K     Trainable params
0         Non-trainable params
518 K     Total params
2.074     Total estimated model params size (MB)
32        Modules in train mode
0         Modules in eval mode


                                                                            

d:\Lab\Research\HERMES-EHR\.venv\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:433: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=19` in the `DataLoader` to improve performance.
d:\Lab\Research\HERMES-EHR\.venv\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:433: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=19` in the `DataLoader` to improve performance.
d:\Lab\Research\HERMES-EHR\.venv\Lib\site-packages\lightning\pytorch\loops\fit_loop.py:310: The number of training batches (44) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


Epoch 25: 100%|██████████| 44/44 [00:00<00:00, 59.51it/s, v_num=only]
best_model_path: D:/Lab/Research/HERMES-EHR/good_data/complete//readmission\EMERGE_hid128_ehrgru_textnote_only_ours_note_only\checkpoints\best.ckpt


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
d:\Lab\Research\HERMES-EHR\.venv\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:433: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=19` in the `DataLoader` to improve performance.


Testing DataLoader 0: 100%|██████████| 13/13 [00:00<00:00, 100.49it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
   test_outcome_auprc       0.1226031556725502
   test_outcome_auroc       0.4515044689178467
   test_outcome_minpse      0.13762903213500977
 test_readmission_auprc     0.3730620741844177
 test_readmission_auroc     0.7303538918495178
 test_readmission_minpse    0.3945578336715698
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


Experimenting with focus_task: readmission, use_modality: note_only


Seed set to 42
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type          | Params | Mode 
--------------------------------------------------
0 | model   | EMERGE        | 485 K  | train
1 | head    | MultitaskHead | 33.3 K | train
2 | loss_fn | MultitaskLoss | 2      | train
--------------------------------------------------
518 K     Trainable params
0         Non-trainable params
518 K     Total params
2.074     Total estimated model params size (MB)
32        Modules in train mode
0         Modules in eval mode


                                                                            

d:\Lab\Research\HERMES-EHR\.venv\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:433: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=19` in the `DataLoader` to improve performance.
d:\Lab\Research\HERMES-EHR\.venv\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:433: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=19` in the `DataLoader` to improve performance.
d:\Lab\Research\HERMES-EHR\.venv\Lib\site-packages\lightning\pytorch\loops\fit_loop.py:310: The number of training batches (44) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


Epoch 28: 100%|██████████| 44/44 [00:00<00:00, 60.16it/s, v_num=only]
best_model_path: D:/Lab/Research/HERMES-EHR/good_data/complete//readmission\EMERGE_hid128_ehrgru_textsummary_only_ours_summary_only\checkpoints\best.ckpt


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
d:\Lab\Research\HERMES-EHR\.venv\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:433: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=19` in the `DataLoader` to improve performance.


Testing DataLoader 0: 100%|██████████| 13/13 [00:00<00:00, 104.79it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
   test_outcome_auprc       0.3147587776184082
   test_outcome_auroc       0.7304941415786743
   test_outcome_minpse      0.34090909361839294
 test_readmission_auprc     0.39074593782424927
 test_readmission_auroc     0.7209926843643188
 test_readmission_minpse    0.4136752188205719
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


Experimenting with focus_task: readmission, use_modality: summary_only


Seed set to 42
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type          | Params | Mode 
--------------------------------------------------
0 | model   | EMERGE        | 485 K  | train
1 | head    | MultitaskHead | 33.3 K | train
2 | loss_fn | MultitaskLoss | 2      | train
--------------------------------------------------
518 K     Trainable params
0         Non-trainable params
518 K     Total params
2.074     Total estimated model params size (MB)
32        Modules in train mode
0         Modules in eval mode


                                                                            

d:\Lab\Research\HERMES-EHR\.venv\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:433: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=19` in the `DataLoader` to improve performance.
d:\Lab\Research\HERMES-EHR\.venv\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:433: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=19` in the `DataLoader` to improve performance.
d:\Lab\Research\HERMES-EHR\.venv\Lib\site-packages\lightning\pytorch\loops\fit_loop.py:310: The number of training batches (44) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


Epoch 11: 100%|██████████| 44/44 [00:00<00:00, 55.22it/s, v_num=note]
best_model_path: D:/Lab/Research/HERMES-EHR/good_data/complete//readmission\EMERGE_hid128_ehrgru_textnote_only_ours_ehr_note\checkpoints\best.ckpt


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
d:\Lab\Research\HERMES-EHR\.venv\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:433: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=19` in the `DataLoader` to improve performance.


Testing DataLoader 0: 100%|██████████| 13/13 [00:00<00:00, 93.29it/s] 
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
   test_outcome_auprc       0.4970742464065552
   test_outcome_auroc       0.8510927557945251
   test_outcome_minpse      0.4886363744735718
 test_readmission_auprc     0.5210307836532593
 test_readmission_auroc     0.8017672300338745
 test_readmission_minpse    0.5017300844192505
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


Experimenting with focus_task: readmission, use_modality: ehr_note


Seed set to 42
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type          | Params | Mode 
--------------------------------------------------
0 | model   | EMERGE        | 485 K  | train
1 | head    | MultitaskHead | 33.3 K | train
2 | loss_fn | MultitaskLoss | 2      | train
--------------------------------------------------
518 K     Trainable params
0         Non-trainable params
518 K     Total params
2.074     Total estimated model params size (MB)
32        Modules in train mode
0         Modules in eval mode


                                                                            

d:\Lab\Research\HERMES-EHR\.venv\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:433: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=19` in the `DataLoader` to improve performance.
d:\Lab\Research\HERMES-EHR\.venv\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:433: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=19` in the `DataLoader` to improve performance.
d:\Lab\Research\HERMES-EHR\.venv\Lib\site-packages\lightning\pytorch\loops\fit_loop.py:310: The number of training batches (44) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


Epoch 34: 100%|██████████| 44/44 [00:00<00:00, 55.13it/s, v_num=mary]
best_model_path: D:/Lab/Research/HERMES-EHR/good_data/complete//readmission\EMERGE_hid128_ehrgru_textsummary_only_ours_ehr_summary\checkpoints\best.ckpt


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
d:\Lab\Research\HERMES-EHR\.venv\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:433: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=19` in the `DataLoader` to improve performance.


Testing DataLoader 0: 100%|██████████| 13/13 [00:00<00:00, 97.67it/s] 
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
   test_outcome_auprc       0.07960731536149979
   test_outcome_auroc       0.1735433042049408
   test_outcome_minpse      0.13750000298023224
 test_readmission_auprc     0.5372869372367859
 test_readmission_auroc     0.7959691882133484
 test_readmission_minpse    0.5034602284431458
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


Experimenting with focus_task: readmission, use_modality: ehr_summary


Seed set to 42
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type          | Params | Mode 
--------------------------------------------------
0 | model   | EMERGE        | 485 K  | train
1 | head    | MultitaskHead | 33.3 K | train
2 | loss_fn | MultitaskLoss | 2      | train
--------------------------------------------------
518 K     Trainable params
0         Non-trainable params
518 K     Total params
2.074     Total estimated model params size (MB)
32        Modules in train mode
0         Modules in eval mode


                                                                            

d:\Lab\Research\HERMES-EHR\.venv\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:433: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=19` in the `DataLoader` to improve performance.
d:\Lab\Research\HERMES-EHR\.venv\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:433: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=19` in the `DataLoader` to improve performance.
d:\Lab\Research\HERMES-EHR\.venv\Lib\site-packages\lightning\pytorch\loops\fit_loop.py:310: The number of training batches (44) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


Epoch 20: 100%|██████████| 44/44 [00:00<00:00, 57.38it/s, v_num=mary]
best_model_path: D:/Lab/Research/HERMES-EHR/good_data/complete//readmission\EMERGE_hid128_ehrgru_textconcat_ours_note_summary\checkpoints\best.ckpt


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
d:\Lab\Research\HERMES-EHR\.venv\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:433: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=19` in the `DataLoader` to improve performance.


Testing DataLoader 0: 100%|██████████| 13/13 [00:00<00:00, 78.79it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
   test_outcome_auprc       0.3808845579624176
   test_outcome_auroc       0.7954051494598389
   test_outcome_minpse      0.3986486494541168
 test_readmission_auprc     0.4303235709667206
 test_readmission_auroc     0.7677876353263855
 test_readmission_minpse    0.43425604701042175
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


Experimenting with focus_task: readmission, use_modality: note_summary


Seed set to 42
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type          | Params | Mode 
--------------------------------------------------
0 | model   | EMERGE        | 485 K  | train
1 | head    | MultitaskHead | 33.3 K | train
2 | loss_fn | MultitaskLoss | 2      | train
--------------------------------------------------
518 K     Trainable params
0         Non-trainable params
518 K     Total params
2.074     Total estimated model params size (MB)
32        Modules in train mode
0         Modules in eval mode


                                                                            

d:\Lab\Research\HERMES-EHR\.venv\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:433: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=19` in the `DataLoader` to improve performance.
d:\Lab\Research\HERMES-EHR\.venv\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:433: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=19` in the `DataLoader` to improve performance.
d:\Lab\Research\HERMES-EHR\.venv\Lib\site-packages\lightning\pytorch\loops\fit_loop.py:310: The number of training batches (44) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


Epoch 17: 100%|██████████| 44/44 [00:00<00:00, 49.35it/s, v_num=mary]
best_model_path: D:/Lab/Research/HERMES-EHR/good_data/complete//readmission\EMERGE_hid128_ehrgru_textconcat_ours_ehr_note_summary\checkpoints\best.ckpt


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
d:\Lab\Research\HERMES-EHR\.venv\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:433: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=19` in the `DataLoader` to improve performance.


Testing DataLoader 0: 100%|██████████| 13/13 [00:00<00:00, 103.81it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
   test_outcome_auprc       0.5193474292755127
   test_outcome_auroc        0.791002631187439
   test_outcome_minpse      0.5249999761581421
 test_readmission_auprc     0.5375418663024902
 test_readmission_auroc     0.8068361282348633
 test_readmission_minpse    0.5146299600601196
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


Experimenting with focus_task: readmission, use_modality: ehr_note_summary
