In [5]:
%load_ext autoreload
%autoreload 2
%cd ~/Research/Sriram/DeepSetRNN

/home/minh/Research/Sriram/DeepSetRNN


In [6]:
from collections import defaultdict, namedtuple
import os
import string
import logging
import importlib

import pandas as pd
from sklearn import metrics
import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from src import experiment_utils, utils

In [7]:
logging.info("starting logger")
_LOGGER = logging.getLogger('VisitTimeSeries')
_LOGGER.setLevel(logging.DEBUG)

# Reading data

In [8]:
_ROOT_DATA_PATH = 'data/MIMIC3database/'
_PROCESSED_DATA_PATH = os.path.join(_ROOT_DATA_PATH, 'processed/MIMIC3EachPerson')

In [9]:
train_series_df = pd.read_csv(os.path.join(_PROCESSED_DATA_PATH, 'train_timeseries.csv'),
                              index_col=0)

train_labels_df = pd.read_csv(os.path.join(_PROCESSED_DATA_PATH, 'train_labels.csv'),
                              index_col=0)

train_labels_df.drop(['Icustay', 'Ethnicity', 'Gender', 'Age', 'Height', 'Weight',
       'Length of Stay', 'Mortality'], axis=1, inplace=True)


train_series_df = train_series_df.set_index(['SUBJECT_ID', 'ADMISSION_NUM'])
train_labels_df = train_labels_df.set_index(['SUBJECT_ID', 'ADMISSION_NUM'])


  mask |= (ar1 == a)


In [10]:
test_series_df = pd.read_csv(os.path.join(_PROCESSED_DATA_PATH, 'test_timeseries.csv'),
                              index_col=0)

test_labels_df = pd.read_csv(os.path.join(_PROCESSED_DATA_PATH, 'test_labels.csv'),
                              index_col=0)

test_labels_df.drop(['Icustay', 'Ethnicity', 'Gender', 'Age', 'Height', 'Weight',
       'Length of Stay', 'Mortality'], axis=1, inplace=True)

test_series_df = test_series_df.set_index(['SUBJECT_ID', 'ADMISSION_NUM'])
test_labels_df = test_labels_df.set_index(['SUBJECT_ID', 'ADMISSION_NUM'])

In [11]:
n_features = train_series_df.iloc[0].shape[0]
n_class = train_labels_df.iloc[0].shape[0]

In [12]:
train_indices = list(set(train_series_df.index).intersection(set(train_labels_df.index)))
test_indices = list(set(test_series_df.index).intersection(set(test_labels_df.index)))

In [None]:
train_sorted_idx = np.argsort([ train_series_df.xs(idx, level=[0,1]).shape[0]
              for idx in train_indices])

test_sorted_idx = np.argsort([ test_series_df.xs(idx, level=[0,1]).shape[0]
              for idx in test_indices])

In [36]:
train_indices = np.array(train_indices)[train_sorted_idx]
test_indices = np.array(test_indices)[test_sorted_idx]


np.save('{}/train_indices_sorted.npy'.format(_PROCESSED_DATA_PATH), train_indices)
np.save('{}/test_indices_sorted.npy'.format(_PROCESSED_DATA_PATH), test_indices)

array([66, 65, 89, 75,  5, 48, 78, 31, 59,  1, 92, 44, 35, 73, 91, 41, 80,
       60, 30, 83, 54, 29, 16, 24, 10, 96, 86,  9, 27, 42, 53, 12, 46, 81,
       76,  7, 36, 39, 17, 49, 21, 93, 84, 38, 23, 71, 11, 20, 34, 97, 70,
       77, 25,  0,  8, 50, 51,  4, 74, 56, 18, 82, 57, 95, 26, 33, 87, 52,
       79, 88, 68, 72,  2, 28, 37,  6, 94, 14, 13, 19, 63, 47, 67, 69, 22,
       43, 58, 32, 61, 40, 15, 99,  3, 90, 45, 55, 85, 98, 62, 64])

# Training Model

In [13]:
_MODEL_LOG_ROOT_PATH = 'logs/MIMIC3/VisitTimeSeries'
_MODEL_ROOT_PATH = 'models/MIMIC3/VisitTimeSeries'

In [18]:
from src.model.lstm import LSTMClassifier

ModelArgs = namedtuple('HospitalVisitLSTMClassifier', 
                      ['hidden_dims',
                       'batch_size'
                       'n_epoch',
                       'lr',
                       'momentum',
                      'n_layers',
                      'dropout']
                      )
args = ModelArgs(
    hidden_dims=[500,200],
    batch_size=500,
    n_epoch = 5,
    lr = 0.1,
    n_layers=1,
    momentum = 0.9,
    dropout=0.5,
)

model_name = str(args)

model = LSTMClassifier(hidden_dims=args.hidden_dims,
                       input_dim=n_features,
                       n_class=n_class,
                       dropout=args.dropout)
loss_fn = nn.BCEWithLogitsLoss()
optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum)
experiment_utils.setup_model_logger(_LOGGER, model_name, _MODEL_LOG_ROOT_PATH)

In [23]:
model = model.eval()
count = 0
total_loss = 0

for idx_batch in utils.generate_batch(test_indices[:10], 2):
    curr_series = test_series_df.xs(idx_batch, level=[0,1])
    output = test_labels_df.xs(idx_batch, level=[0,1])
    
    logit = model(utils.to_tensor(curr_series))
    total_loss += loss_fn(logit, utils.to_tensor(output))
total_loss = total_loss.data.numpy()

_LOGGER.info("Initial Validation Loss: {}".format(total_loss/len(test_indices)))

KeyboardInterrupt: 

In [20]:
model = model.train()
_LOGGER.info("Training model...")
training_losses = []
for epoch in range(args.n_epoch):
    total_loss = 0
    for idx in train_indices[:100]:
        curr_series = train_series_df.xs(idx, level=[0,1])
        output = train_labels_df.xs(idx, level=[0,1])        
        logit = model(utils.to_tensor(curr_series))
        loss = loss_fn(logit, utils.to_tensor(output))
        total_loss += loss.data.numpy()
        loss.backward()
        optimizer.step()
    mean_loss = total_loss/len(train_indices[:100])
    _LOGGER.info("Epoch: {}, Loss: {}".format(epoch, mean_loss))
    training_losses.append(mean_loss)

INFO:VisitTimeSeries:Training model...
INFO:VisitTimeSeries:Epoch: 0, Loss: 636.6494873799384
INFO:VisitTimeSeries:Epoch: 1, Loss: 3.310214679152705
INFO:VisitTimeSeries:Epoch: 2, Loss: 3.7372892934829
INFO:VisitTimeSeries:Epoch: 3, Loss: 3.919697518646717
INFO:VisitTimeSeries:Epoch: 4, Loss: 4.501575288921595


In [22]:
len(train_indices)/100 * 3.2 / 60

19.053333333333335

In [17]:
torch.save(model, "{}/{}.pt".format(_MODEL_ROOT_PATH, model_name))

35725

In [None]:
model = model.eval()
count = 0
total_loss = 0

for idx in test_indices:
    curr_series = test_series_df.xs(idx, level=[0,1])
    output = test_labels_df.xs(idx, level=[0,1])
    
    logit = model(utils.to_tensor(curr_series))
    total_loss += loss_fn(logit, utils.to_tensor(output))
total_loss = total_loss.data.numpy()

_LOGGER.info("Final Validation Loss: {}".format(total_loss/len(test_indices)))

In [None]:
experiment_utils.setup_model_logger(_LOGGER, model_name, _MODEL_LOG_ROOT_PATH)

test_losses = experiment_utils.evaluate_validation_loss_template(model, loss_fn, test_series, test_labels)

_LOGGER.info("Initial Validation Loss: {}".format(np.mean(test_losses)))


training_losses = experiment_utils.train_model_template(model, loss_fn, optimizer, args.n_epoch, train_series, train_labels, _LOGGER)
torch.save(model, "{}/{}.pt".format(_MODEL_ROOT_PATH, model_name))


test_losses = experiment_utils.evaluate_validation_loss_template(model, 
                                          loss_fn, 
                                          test_series,
                                         test_labels)

_LOGGER.info("Final Validation Loss: {}".format(np.mean(test_losses)))