In [1]:
%load_ext autoreload
%autoreload 2
%cd ~/Research/Sriram/DeepSetRNN

/home/minh/Research/Sriram/DeepSetRNN


In [2]:
from collections import defaultdict, namedtuple
import os
import string
import logging
import importlib

import pandas as pd
from sklearn import metrics
import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from src import experiment_utils, utils

In [3]:
logging.info("starting logger")
_LOGGER = logging.getLogger('VisitTimeSeries')
_LOGGER.setLevel(logging.DEBUG)

# Reading data

In [4]:
_ROOT_DATA_PATH = 'data/MIMIC3database/'
_PROCESSED_DATA_PATH = os.path.join(_ROOT_DATA_PATH, 'processed/MIMIC3EachPerson')

In [11]:
train_series_df = pd.read_csv(os.path.join(_PROCESSED_DATA_PATH, 'train_timeseries.csv'),
                              index_col=0)

train_labels_df = pd.read_csv(os.path.join(_PROCESSED_DATA_PATH, 'train_labels.csv'),
                              index_col=0)

train_labels_df.drop(['Icustay', 'Ethnicity', 'Gender', 'Age', 'Height', 'Weight',
       'Length of Stay', 'Mortality'], axis=1, inplace=True)


train_series_df = train_series_df.set_index(['SUBJECT_ID', 'ADMISSION_NUM'])
train_labels_df = train_labels_df.set_index(['SUBJECT_ID', 'ADMISSION_NUM'])


In [12]:
test_series_df = pd.read_csv(os.path.join(_PROCESSED_DATA_PATH, 'test_timeseries.csv'),
                              index_col=0)

test_labels_df = pd.read_csv(os.path.join(_PROCESSED_DATA_PATH, 'test_labels.csv'),
                              index_col=0)

test_labels_df.drop(['Icustay', 'Ethnicity', 'Gender', 'Age', 'Height', 'Weight',
       'Length of Stay', 'Mortality'], axis=1, inplace=True)

test_series_df = test_series_df.set_index(['SUBJECT_ID', 'ADMISSION_NUM'])
test_labels_df = test_labels_df.set_index(['SUBJECT_ID', 'ADMISSION_NUM'])

In [13]:
n_features = train_series_df.iloc[0].shape[0]
n_class = train_labels_df.iloc[0].shape[0]

In [16]:
train_indices = set(train_series_df.index).intersection(set(train_labels_df.index))
test_indices = set(test_series_df.index).intersection(set(test_labels_df.index))

In [None]:
train_series = [train_series_df.xs(idx, level=[0,1]) for idx in train_indices]
train_labels = [train_labels_df.xs(idx, level=[0,1]) for idx in train_indices]

In [None]:
test_series = [test_series_df.xs(idx, level=[0,1]) for idx in test_indices]
test_labels = [test_labels_df.xs(idx, level=[0,1]) for idx in test_indices]

# Training Model

In [None]:
_MODEL_LOG_ROOT_PATH = 'logs/MIMIC3/VisitTimeSeries'
_MODEL_ROOT_PATH = 'models/MIMIC3/VisitTimeSeries'

In [None]:
from src.model.lstm import LSTMClassifier

ModelArgs = namedtuple('HospitalVisitLSTMClassifier', 
                      ['hidden_dims',
                       'n_epoch',
                       'lr',
                       'momentum',
                      'n_layers',
                      'dropout']
                      )
args = ModelArgs(
    hidden_dims=[1000,100],
    n_epoch = 10,
    lr = 0.1,
    n_layers=1,
    momentum = 0.9,
    dropout=0.5,
)

model_name = str(args)

model = LSTMClassifier(hidden_dims=args.hidden_dims,
                       input_dim=n_features,
                       n_class=n_class,
                       dropout=args.dropout)
loss_fn = nn.BCEWithLogitsLoss()
optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum)

In [None]:
experiment_utils.setup_model_logger(_LOGGER, model_name, _MODEL_LOG_ROOT_PATH)

test_losses = experiment_utils.evaluate_validation_loss_template(model, 
                                          loss_fn, 
                                          test_series,
                                         test_labels)
_LOGGER.info("Initial Validation Loss: {}".format(np.mean(test_losses)))

In [None]:

training_losses = experiment_utils.train_model_template(model, loss_fn, optimizer,
                                                    args.n_epoch, 
                                                    train_series,
                                                    train_labels, _LOGGER)
torch.save(model, "{}/{}.pt".format(_MODEL_ROOT_PATH, model_name))

In [None]:

test_losses = experiment_utils.evaluate_validation_loss_template(model, 
                                          loss_fn, 
                                          test_series,
                                         test_labels)

_LOGGER.info("Final Validation Loss: {}".format(np.mean(test_losses)))