# MIMIC-III M3Care Baseline

## Initialize Dataset

In [6]:
from pathlib import Path

from mimic.datasets.mimic import MimicDataset

PROCESSED_DIR = Path('./mimic/data/processed')

dataset_paths = {
    'demographic': PROCESSED_DIR / 'test/demographic.csv',
    'vitals': PROCESSED_DIR / 'test/vitals.csv',
    'interventions': PROCESSED_DIR / 'test/interventions.csv',
    'static_notes': PROCESSED_DIR / 'test/static_notes_bert.h5',
    # 'ts_notes': PROCESSED_DIR / 'test/notes_ts.h5'
}

auxillary_paths = {
    'pat_ids': PROCESSED_DIR / 'test_idxs.npy',
}

ds = MimicDataset(dataset_paths, auxillary_paths)

## Initialize Models

In [7]:
from m3care.m3care import Modal, M3Care

EMB_DIM = 512
OUT_DIM = 2
DROPOUT = 0.3

DEM_INPUT = 18

VIT_INPUT = 104
VIT_TIME_DIM = 150

ITV_INPUT = 14
ITV_TIME_DIM = 150

NOTES_TOK_DIM = 16
NOTES_VOCAB_SIZE = 5000
NOTES_NUM_HEADS = 8

NST_WORD_LIM = 10000

NTS_TIME_DIM = 128
NTS_WORD_LIM = 5000

In [10]:
ds[0:10][0]['static_notes'].shape

torch.Size([10, 768])

In [3]:
from mimic.models.vector import MLP
from mimic.models.sequential import SequentialEmbedder
from mimic.models.nlp import NLSequenceEmbedder, NLTimeSeriesSequenceEmbedder

dem_model = MLP(in_dim=DEM_INPUT,
                hidden_dim=[128,192,256],
                out_dim=EMB_DIM, 
                bias=True,
                relu=True,
                norm=True)

vit_model = SequentialEmbedder(feat_dim=VIT_INPUT,
                             embed_dim=EMB_DIM,
                             max_len=VIT_TIME_DIM,
                             dropout=DROPOUT)

itv_model = SequentialEmbedder(feat_dim=ITV_INPUT,
                             embed_dim=EMB_DIM,
                             max_len=ITV_TIME_DIM,
                             dropout=DROPOUT)

nst_model = MLP(in_dim=768,
                hidden_dim=[1024, 768, 512],
                out_dim=512,
                bias=True,
                relu=True,
                norm=True)
nst_model = NLSequenceEmbedder(vocab=ds.vocab,
                               token_dim=NOTES_TOK_DIM,
                               embed_dim=EMB_DIM,
                               num_heads=NOTES_NUM_HEADS,
                               vocab_size=NOTES_VOCAB_SIZE,
                               max_len=NST_WORD_LIM,
                               dropout=DROPOUT)

nts_model = NLTimeSeriesSequenceEmbedder(vocab=ds.vocab,
                                         token_dim=NOTES_TOK_DIM,
                                         embed_dim=EMB_DIM,
                                         num_heads=NOTES_NUM_HEADS,
                                         vocab_size=NOTES_VOCAB_SIZE,
                                         max_len=NTS_WORD_LIM,
                                         dropout=DROPOUT)

In [4]:
modals = [
    Modal('demographic', dem_model, False, None),   
    Modal('vitals', vit_model, True, VIT_TIME_DIM),
    Modal('interventions', itv_model, True, ITV_TIME_DIM),
    Modal('static_notes', nst_model, True, None),
    Modal('ts_notes', nts_model, True, NTS_TIME_DIM)
]

model = M3Care(modals, EMB_DIM, OUT_DIM, 'cpu')



In [5]:
x, masks = ds[0:10]

In [6]:
y = model(x, masks, 10)

In [7]:
y

(tensor([[-0.3053,  0.1150],
         [-0.3278,  0.2367],
         [-0.1843,  0.0831],
         [-0.0569,  0.1125],
         [-0.2555,  0.0998],
         [-0.1466,  0.1032],
         [-0.2192,  0.1324],
         [-0.1943,  0.0857],
         [-0.2965,  0.1445],
         [-0.2300,  0.2895]], grad_fn=<SqueezeBackward1>),
 tensor(20149.6914, grad_fn=<AddBackward0>))