# MIMIC-III M3Care Baseline

### Initialize Datasets

In [111]:
from pathlib import Path

from mimic.datasets.mimic import MimicDataset

PROCESSED_DIR = Path('./mimic/data/processed')

train_dataset_paths = {
    'demographic': PROCESSED_DIR / 'train/demographic.csv',
    'vitals': PROCESSED_DIR / 'train/vitals.csv',
    'interventions': PROCESSED_DIR / 'train/interventions.csv',
    'static_notes': PROCESSED_DIR / 'train/static_notes_bert.h5',
    'ts_notes': PROCESSED_DIR / 'train/ts_notes_bert.h5',
    'labels': PROCESSED_DIR / 'train/labels.csv'
}

test_dataset_paths = {
    'demographic': PROCESSED_DIR / 'test/demographic.csv',
    'vitals': PROCESSED_DIR / 'test/vitals.csv',
    'interventions': PROCESSED_DIR / 'test/interventions.csv',
    'static_notes': PROCESSED_DIR / 'test/static_notes_bert.h5',
    'ts_notes': PROCESSED_DIR / 'test/ts_notes_bert.h5',
    'labels': PROCESSED_DIR / 'test/labels.csv'
}

train_auxillary_paths = {
    'pat_ids': PROCESSED_DIR / 'test_idxs.npy',
}

test_auxillary_paths = {
    'pat_ids': PROCESSED_DIR / 'test_idxs.npy',
}

train_ds = MimicDataset(train_dataset_paths, train_auxillary_paths)
test_ds = MimicDataset(test_dataset_paths, test_auxillary_paths)

### Initialize Unimodal Models

In [112]:
from m3care.m3care import Modal, M3Care

EMB_DIM = 512
OUT_DIM = 2
DROPOUT = 0.3

DEM_INPUT = 18

VIT_INPUT = 104
VIT_TIME_DIM = 150

ITV_INPUT = 14
ITV_TIME_DIM = 150

NOTES_TOK_DIM = 16
NOTES_VOCAB_SIZE = 5000
NOTES_NUM_HEADS = 8

NST_INPUT = 768
NST_WORD_LIM = 10000

NTS_INPUT = 782
NTS_TIME_DIM = 128
NTS_WORD_LIM = 5000

In [113]:
from mimic.models.vector import MLP
from mimic.models.sequential import SequentialEmbedder

dem_model = MLP(in_dim=DEM_INPUT,
                hidden_dim=[128,192,256],
                out_dim=EMB_DIM, 
                bias=True,
                relu=True,
                norm=True)

vit_model = SequentialEmbedder(feat_dim=VIT_INPUT,
                             embed_dim=EMB_DIM,
                             max_len=VIT_TIME_DIM,
                             dropout=DROPOUT)

itv_model = SequentialEmbedder(feat_dim=ITV_INPUT,
                             embed_dim=EMB_DIM,
                             max_len=ITV_TIME_DIM,
                             dropout=DROPOUT)

nst_model = MLP(in_dim=NST_INPUT,
                hidden_dim=[1024, 768, 512],
                out_dim=EMB_DIM,
                bias=True,
                relu=True,
                norm=True)

nts_model = SequentialEmbedder(feat_dim=NTS_INPUT,
                               embed_dim=EMB_DIM,
                               max_len=NTS_TIME_DIM,
                               dropout=DROPOUT,
                               mlp_hidden=[1024, 768, 512])

## Training Setup

In [124]:
import torch

KFOLDS = 5
BATCH_SIZE = 32
LEARNING_RATE = 1e-3
EPOCHS = 5

device = 'cuda' if torch.cuda.is_available() else 'cpu'

Initalize data loaders

In [125]:
from util.kfold import KFoldDatasetLoader

train_loader = KFoldDatasetLoader(train_ds, KFOLDS, BATCH_SIZE)
test_loader = KFoldDatasetLoader(test_ds, KFOLDS, BATCH_SIZE)

Initialize model

In [126]:
modals = [
    Modal('demographic', dem_model, False, None),   
    Modal('vitals', vit_model, True, VIT_TIME_DIM),
    Modal('interventions', itv_model, True, ITV_TIME_DIM),
    Modal('static_notes', nst_model, True, None),
    Modal('ts_notes', nts_model, True, NTS_TIME_DIM)
]

model = M3Care(modals, EMB_DIM, OUT_DIM, device)



Loss function

In [127]:
loss = torch.nn.BCEWithLogitsLoss()

Optimizer

In [128]:
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)

## Training Loop

In [109]:
# Loop through each epoch
for epoch in range(EPOCHS):

    # Loop through each set of kfolds
    while not train_loader.end:
        # Loop through each training batch
        while not train_loader.train_round_end:
            pass

        while not train_loader.val_round_end:
            pass

    train_loader.reset()

(None, None)