## Mortality Training
1. Load the MIMIC III Dataset
2. Normalize Data
3. Load into Pytorch Dataloader
4. Train
5. Evaluate

In [1]:
from comet_ml import Experiment
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader

from core.model import SAnD
from mimic3_benchmarks.mimic3benchmark.readers import InHospitalMortalityReader
from mimic3_benchmarks.mimic3models.preprocessing import Discretizer, Normalizer
from utils.ihm_utils import load_data
from utils.functions import get_weighted_sampler, get_weights
from utils.trainer import NeuralNetworkClassifier

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

## Load Data
Using the [MimicIII Benchmark's](https://github.com/YerevaNN/mimic3-benchmarks) InHospitalMortalityReader and associated functions, load the data into memory.

If you are following along you will need to arrange the data following the directions on the link.


In [2]:
train_reader = InHospitalMortalityReader(dataset_dir="mimic3_benchmarks/data/in-hospital-mortality/train",
                              listfile="mimic3_benchmarks/data/in-hospital-mortality/train_listfile.csv")
val_reader = InHospitalMortalityReader(dataset_dir="mimic3_benchmarks/data/in-hospital-mortality/train",
                              listfile="mimic3_benchmarks/data/in-hospital-mortality/val_listfile.csv")
test_reader = InHospitalMortalityReader(dataset_dir="mimic3_benchmarks/data/in-hospital-mortality/test",
                              listfile="mimic3_benchmarks/data/in-hospital-mortality/test_listfile.csv")

In [4]:
discretizer = Discretizer(timestep=1.0,
                          store_masks=True,
                          impute_strategy='previous',
                          start_time='zero')

discretizer_header = discretizer.transform(train_reader.read_example(0)["X"])[1].split(',')
cont_channels = [i for (i, x) in enumerate(discretizer_header) if x.find("->") == -1]

In [5]:
normalizer = Normalizer(fields=cont_channels)  # choose here which columns to standardize
normalizer_state = 'train/ihm_ts1.0.input_str-previous.start_time-zero.normalizer'
normalizer.load_params(normalizer_state)

In [6]:
train_raw = load_data(train_reader, discretizer, normalizer)
val_raw = load_data(val_reader, discretizer, normalizer)
test_raw = load_data(val_reader, discretizer, normalizer)

N, seq_len, feature_count = train_raw[0].shape

In [7]:
batch_size = 256

train_ds = TensorDataset(train_raw[0], train_raw[1])
val_ds = TensorDataset(val_raw[0], val_raw[1])
test_ds = TensorDataset(test_raw[0], test_raw[1])

train_loader = DataLoader(train_ds, batch_size=batch_size)#sampler=get_weighted_sampler(y_train))
val_loader = DataLoader(val_ds, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_ds, batch_size=batch_size)

## Create model, Train, and Evaluate
Train the model, this will export results to Comet ML.
Creating is done in the same step such that a new experiment made each time
Evaluation is done in the same step such that the Comet ML experiment is also deleted.

In [8]:
n_heads = 8
factor = 12 # M
num_class = 2
num_layers = 1 # N
epochs = 15
betas = (0.9, 0.98)
lr = 0.0005
eps = 4e-09
weight_decay = 5e-4

experiment = Experiment(
    api_key="eQ3INeSsFGUYKahSdEtjhry42",
    project_name="general",
    workspace="samdoud"
)

clf = NeuralNetworkClassifier(
    SAnD(feature_count, seq_len, n_heads, factor, num_class, num_layers, dropout_rate=0.3),
    nn.CrossEntropyLoss(weight=torch.tensor(get_weights(train_raw[1], level=2), dtype=torch.float32).to(device=device)),
    optim.Adam, optimizer_config={
        "lr": lr, "betas": betas, "eps": eps, "weight_decay": weight_decay},
    experiment=experiment
)

clf.fit(
    {
        "train": train_loader,
        "val": val_loader
    },
    validation=True,
    epochs=epochs
)

clf.evaluate(test_loader)
experiment.end()

COMET INFO: Experiment is live on comet.com https://www.comet.com/samdoud/general/f743d14450014257a15909de6c7e2d8b

[36mTraining[0m - Epochs: 001/015:   0%|          | 0/14681 [00:00<?, ?it/s]

RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cpu and cuda:0! (when checking argument for argument batch1 in method wrapper_CUDA_baddbmm)