In [None]:
import pandas as pd
from itertools import islice
import numpy as np
import xarray
import json
import os
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import StratifiedShuffleSplit
from skmultilearn.model_selection import iterative_train_test_split
from sklearn.model_selection import ParameterGrid
from sksurv.metrics import concordance_index_ipcw, brier_score, cumulative_dynamic_auc
from sklearn.preprocessing import MinMaxScaler, QuantileTransformer

import numpy as np
import matplotlib.pyplot as plt

# For preprocessing
from sklearn.preprocessing import StandardScaler
from sklearn_pandas import DataFrameMapper 
from pycox.models import DeepHitSingle

import torch # For building the networks 
from torch import nn
import torch.nn.functional as F
import torchtuples as tt # Some useful functions

from pycox.datasets import metabric
from pycox.models import LogisticHazard
from pycox.models import CoxPH
from pycox.models.loss import NLLLogistiHazardLoss, NLLMTLRLoss, BCESurvLoss
from pycox.evaluation import EvalSurv

import seaborn as sn
sn.set_theme(style="white", palette="rocket_r")

np.random.seed(100)
_ = torch.manual_seed(100)

In [None]:
labels = pd.read_csv('preprocessed_labels.csv', index_col='patient')
labels['actualiculos'] = labels['actualiculos']*24
labels.drop(labels.loc[labels['actualiculos']>240].index, inplace=True)

statics = pd.read_csv('preprocessed_flat.csv', index_col='patient')
statics.drop('nullheight', axis=1, inplace=True)

In [None]:
data = statics.merge(labels, left_index=True, right_index=True)

In [None]:
sample_labels = labels.loc[list(set(list(data.index.get_level_values('patient'))))]

df_test = sample_labels.sample(frac=0.2)
df_train = sample_labels.drop(df_test.index)
df_val = sample_labels.sample(frac=0.2)
df_train = sample_labels.drop(df_val.index)

cols_standardize = ['age', 'height', 'weight']
cols_leave = ['gender', 'hour', 'eyes', 'motor', 'verbal',
       'ethnicity_BLACK/AFRICAN AMERICAN', 'ethnicity_OTHER',
       'ethnicity_UNKNOWN', 'ethnicity_WHITE',
       'ethnicity_WHITE - OTHER EUROPEAN', 'ethnicity_misc',
       'first_careunit_Cardiac Vascular Intensive Care Unit (CVICU)',
       'first_careunit_Coronary Care Unit (CCU)',
       'first_careunit_Medical Intensive Care Unit (MICU)',
       'first_careunit_Medical/Surgical Intensive Care Unit (MICU/SICU)',
       'first_careunit_Neuro Intermediate', 'first_careunit_Neuro Stepdown',
       'first_careunit_Neuro Surgical Intensive Care Unit (Neuro SICU)',
       'first_careunit_Surgical Intensive Care Unit (SICU)',
       'first_careunit_Trauma SICU (TSICU)',
       'admission_location_EMERGENCY ROOM',
       'admission_location_PHYSICIAN REFERRAL',
       'admission_location_PROCEDURE SITE',
       'admission_location_TRANSFER FROM HOSPITAL',
       'admission_location_TRANSFER FROM SKILLED NURSING FACILITY',
       'admission_location_WALK-IN/SELF REFERRAL', 'admission_location_misc',
       'insurance_Medicaid', 'insurance_Medicare', 'insurance_Other']

standardize = [([col], StandardScaler()) for col in cols_standardize]
leave = [(col, None) for col in cols_leave]

x_mapper = DataFrameMapper(standardize + leave)

st = set(list(df_val.index.get_level_values('patient')))
val_index = [i for i, e in enumerate(list(set(list(data.index.get_level_values('patient'))))) if e in st]
st = set(list(df_train.index.get_level_values('patient')))
train_index = [i for i, e in enumerate(list(set(list(data.index.get_level_values('patient'))))) if e in st]
st = set(list(df_test.index.get_level_values('patient')))
test_index = [i for i, e in enumerate(list(set(list(data.index.get_level_values('patient'))))) if e in st]

input_train = data.iloc[list(train_index)]
input_val = data.iloc[val_index]
input_test = data.iloc[test_index]

num_durations = 10

labtrans = LogisticHazard.label_transform(num_durations)

get_target = lambda df: (df['actualiculos'].values.astype('float32'), df['actualhospitalmortality'].values.astype('int32'))
y_train = labtrans.fit_transform(*get_target(input_train))
y_val = labtrans.fit_transform(*get_target(input_val))

input_train.drop(['uniquepid', 'patienthealthsystemstayid', 'actualhospitalmortality', 'actualiculos'], axis=1, inplace=True)
input_val.drop(['uniquepid', 'patienthealthsystemstayid', 'actualhospitalmortality', 'actualiculos'], axis=1, inplace=True)

x_train = x_mapper.fit_transform(input_train).astype('float32')
x_val = x_mapper.transform(input_val).astype('float32')

train = (x_train, y_train)
val = (x_val, y_val)

# We don't need to transform the test labels
durations_test, events_test = get_target(input_test)

input_test.drop(['uniquepid', 'patienthealthsystemstayid', 'actualhospitalmortality', 'actualiculos'], axis=1, inplace=True)

x_test = x_mapper.transform(input_test).astype('float32')

# Standard Logistic Hazard

In [None]:
in_features = x_train.shape[1]
num_nodes = [3*in_features, 5*in_features, 3*in_features]
out_features = 10
batch_norm = False
dropout = 0.0
output_bias = False

net = tt.practical.MLPVanilla(in_features, num_nodes, out_features, batch_norm,
                              dropout, output_bias=output_bias)

In [None]:
model = LogisticHazard(net, tt.optim.Adam(0.01), duration_index=labtrans.cuts)

In [None]:
batch_size = 256
epochs = 1000

In [None]:
log = model.fit(x_train, y_train, batch_size, epochs, val_data=val)

In [None]:
_ = log.plot()

In [None]:
surv = model.predict_surv_df(x_test)

In [None]:
surv.iloc[:, :5].plot(drawstyle='steps-post')
plt.ylabel('S(t | x)')
_ = plt.xlabel('Time')

In [None]:
surv = model.interpolate(10).predict_surv_df(x_test)

In [None]:
surv.iloc[:, :5].plot(drawstyle='steps-post')
plt.ylabel('S(t | x)')
_ = plt.xlabel('Time')

In [None]:
ev = EvalSurv(surv, durations_test, events_test, censor_surv='km')

In [None]:
ev.concordance_td()

In [None]:
time_grid = np.linspace(durations_test.min(), durations_test.max(), 100)
ev.integrated_brier_score(time_grid)

In [None]:
ev.integrated_nbll(time_grid)

# PMF

In [None]:
from pycox.models import PMF

In [None]:

sample_labels = labels.loc[list(set(list(data.index.get_level_values('patient'))))]

df_test = sample_labels.sample(frac=0.2)
df_train = sample_labels.drop(df_test.index)
df_val = sample_labels.sample(frac=0.2)
df_train = sample_labels.drop(df_val.index)

cols_standardize = ['age', 'height', 'weight']
cols_leave = ['gender', 'hour', 'eyes', 'motor', 'verbal',
       'ethnicity_BLACK/AFRICAN AMERICAN', 'ethnicity_OTHER',
       'ethnicity_UNKNOWN', 'ethnicity_WHITE',
       'ethnicity_WHITE - OTHER EUROPEAN', 'ethnicity_misc',
       'first_careunit_Cardiac Vascular Intensive Care Unit (CVICU)',
       'first_careunit_Coronary Care Unit (CCU)',
       'first_careunit_Medical Intensive Care Unit (MICU)',
       'first_careunit_Medical/Surgical Intensive Care Unit (MICU/SICU)',
       'first_careunit_Neuro Intermediate', 'first_careunit_Neuro Stepdown',
       'first_careunit_Neuro Surgical Intensive Care Unit (Neuro SICU)',
       'first_careunit_Surgical Intensive Care Unit (SICU)',
       'first_careunit_Trauma SICU (TSICU)',
       'admission_location_EMERGENCY ROOM',
       'admission_location_PHYSICIAN REFERRAL',
       'admission_location_PROCEDURE SITE',
       'admission_location_TRANSFER FROM HOSPITAL',
       'admission_location_TRANSFER FROM SKILLED NURSING FACILITY',
       'admission_location_WALK-IN/SELF REFERRAL', 'admission_location_misc',
       'insurance_Medicaid', 'insurance_Medicare', 'insurance_Other']

standardize = [([col], StandardScaler()) for col in cols_standardize]
leave = [(col, None) for col in cols_leave]

x_mapper = DataFrameMapper(standardize + leave)

st = set(list(df_val.index.get_level_values('patient')))
val_index = [i for i, e in enumerate(list(set(list(data.index.get_level_values('patient'))))) if e in st]
st = set(list(df_train.index.get_level_values('patient')))
train_index = [i for i, e in enumerate(list(set(list(data.index.get_level_values('patient'))))) if e in st]
st = set(list(df_test.index.get_level_values('patient')))
test_index = [i for i, e in enumerate(list(set(list(data.index.get_level_values('patient'))))) if e in st]

input_train = data.iloc[list(train_index)]
input_val = data.iloc[val_index]
input_test = data.iloc[test_index]

num_durations = 10
labtrans = PMF.label_transform(num_durations)

get_target = lambda df: (df['actualiculos'].values.astype('float32'), df['actualhospitalmortality'].values.astype('int32'))

y_train = labtrans.fit_transform(*get_target(input_train))
y_val = labtrans.transform(*get_target(input_val))

input_train.drop(['uniquepid', 'patienthealthsystemstayid', 'actualhospitalmortality', 'actualiculos'], axis=1, inplace=True)
input_val.drop(['uniquepid', 'patienthealthsystemstayid', 'actualhospitalmortality', 'actualiculos'], axis=1, inplace=True)

x_train = x_mapper.fit_transform(input_train).astype('float32')
x_val = x_mapper.transform(input_val).astype('float32')

val = x_val, y_val

# We don't need to transform the test labels
durations_test, events_test = get_target(input_test)
durations_test = labtrans.transform(*get_target(input_test))[0]

input_test.drop(['uniquepid', 'patienthealthsystemstayid', 'actualhospitalmortality', 'actualiculos'], axis=1, inplace=True)

x_test = x_mapper.transform(input_test).astype('float32')

train = (x_train, y_train)
val = (x_val, y_val)

In [None]:
in_features = x_train.shape[1]
num_nodes = [3*in_features, 5*in_features, 3*in_features]
out_features = labtrans.out_features
batch_norm = False
dropout = 0.0
output_bias = False

net = tt.practical.MLPVanilla(in_features, num_nodes, out_features, batch_norm,
                              dropout, output_bias=output_bias)

In [None]:
model = PMF(net, tt.optim.Adam, duration_index=labtrans.cuts)

In [None]:
batch_size = 256
lr_finder = model.lr_finder(x_train, y_train, batch_size, tolerance=4)
_ = lr_finder.plot()

In [None]:
lr_finder.get_best_lr()

In [None]:
model.optimizer.set_lr(lr_finder.get_best_lr())

In [None]:
epochs = 1000
verbose = True

In [None]:
log = model.fit(x_train, y_train, batch_size, epochs, verbose,
                val_data=val, val_batch_size=batch_size)

In [None]:
_ = log.plot()

In [None]:
surv = model.predict_surv_df(x_test)

In [None]:
surv.iloc[:, :5].plot(drawstyle='steps-post')
plt.ylabel('S(t | x)')
_ = plt.xlabel('Time')

In [None]:
surv = model.interpolate(10).predict_surv_df(x_test)

In [None]:
surv.iloc[:, :5].plot(drawstyle='steps-post')
plt.ylabel('S(t | x)')
_ = plt.xlabel('Time')

In [None]:
ev = EvalSurv(surv, durations_test, events_test, censor_surv='km')

In [None]:
ev.concordance_td('antolini')

In [None]:
input_test = data.iloc[test_index]
durations_test, events_test = get_target(input_test)
ev = EvalSurv(surv, durations_test, events_test, censor_surv='km')
time_grid = np.linspace(durations_test.min(), durations_test.max(), 100)
ev.brier_score(time_grid).plot()
plt.ylabel('Brier score')
_ = plt.xlabel('Time')

In [None]:
ev.integrated_brier_score(time_grid) 

In [None]:
ev.integrated_nbll(time_grid) 

# DeepHit

In [None]:
from pycox.models import DeepHitSingle

In [None]:

sample_labels = labels.loc[list(set(list(data.index.get_level_values('patient'))))]

df_test = sample_labels.sample(frac=0.2)
df_train = sample_labels.drop(df_test.index)
df_val = sample_labels.sample(frac=0.2)
df_train = sample_labels.drop(df_val.index)

cols_standardize = ['age', 'height', 'weight']
cols_leave = ['gender', 'hour', 'eyes', 'motor', 'verbal',
       'ethnicity_BLACK/AFRICAN AMERICAN', 'ethnicity_OTHER',
       'ethnicity_UNKNOWN', 'ethnicity_WHITE',
       'ethnicity_WHITE - OTHER EUROPEAN', 'ethnicity_misc',
       'first_careunit_Cardiac Vascular Intensive Care Unit (CVICU)',
       'first_careunit_Coronary Care Unit (CCU)',
       'first_careunit_Medical Intensive Care Unit (MICU)',
       'first_careunit_Medical/Surgical Intensive Care Unit (MICU/SICU)',
       'first_careunit_Neuro Intermediate', 'first_careunit_Neuro Stepdown',
       'first_careunit_Neuro Surgical Intensive Care Unit (Neuro SICU)',
       'first_careunit_Surgical Intensive Care Unit (SICU)',
       'first_careunit_Trauma SICU (TSICU)',
       'admission_location_EMERGENCY ROOM',
       'admission_location_PHYSICIAN REFERRAL',
       'admission_location_PROCEDURE SITE',
       'admission_location_TRANSFER FROM HOSPITAL',
       'admission_location_TRANSFER FROM SKILLED NURSING FACILITY',
       'admission_location_WALK-IN/SELF REFERRAL', 'admission_location_misc',
       'insurance_Medicaid', 'insurance_Medicare', 'insurance_Other']

standardize = [([col], StandardScaler()) for col in cols_standardize]
leave = [(col, None) for col in cols_leave]

x_mapper = DataFrameMapper(standardize + leave)

st = set(list(df_val.index.get_level_values('patient')))
val_index = [i for i, e in enumerate(list(set(list(data.index.get_level_values('patient'))))) if e in st]
st = set(list(df_train.index.get_level_values('patient')))
train_index = [i for i, e in enumerate(list(set(list(data.index.get_level_values('patient'))))) if e in st]
st = set(list(df_test.index.get_level_values('patient')))
test_index = [i for i, e in enumerate(list(set(list(data.index.get_level_values('patient'))))) if e in st]

input_train = data.iloc[list(train_index)]
input_val = data.iloc[val_index]
input_test = data.iloc[test_index]

num_durations = 10
labtrans = DeepHitSingle.label_transform(num_durations)

get_target = lambda df: (df['actualiculos'].values.astype('float32'), df['actualhospitalmortality'].values.astype('int32'))

y_train = labtrans.fit_transform(*get_target(input_train))
y_val = labtrans.transform(*get_target(input_val))

input_train.drop(['uniquepid', 'patienthealthsystemstayid', 'actualhospitalmortality', 'actualiculos'], axis=1, inplace=True)
input_val.drop(['uniquepid', 'patienthealthsystemstayid', 'actualhospitalmortality', 'actualiculos'], axis=1, inplace=True)

x_train = x_mapper.fit_transform(input_train).astype('float32')
x_val = x_mapper.transform(input_val).astype('float32')

val = x_val, y_val

# We don't need to transform the test labels
durations_test, events_test = get_target(input_test)
durations_test = labtrans.transform(*get_target(input_test))[0]

input_test.drop(['uniquepid', 'patienthealthsystemstayid', 'actualhospitalmortality', 'actualiculos'], axis=1, inplace=True)

x_test = x_mapper.transform(input_test).astype('float32')

train = (x_train, y_train)
val = (x_val, y_val)

In [None]:
in_features = x_train.shape[1]
num_nodes = [3*in_features, 5*in_features, 3*in_features]
out_features = labtrans.out_features
batch_norm = True
dropout = 0.6
output_bias = False

net = tt.practical.MLPVanilla(in_features, num_nodes, out_features, batch_norm,
                              dropout, output_bias=output_bias)

In [None]:
model = DeepHitSingle(net, tt.optim.Adam, alpha=0.2, sigma=0.1, duration_index=labtrans.cuts)

In [None]:
batch_size = 256
lr_finder = model.lr_finder(x_train, y_train, batch_size, tolerance=4)
_ = lr_finder.plot()

In [None]:
lr_finder.get_best_lr()

In [None]:
model.optimizer.set_lr(lr_finder.get_best_lr())

In [None]:
epochs = 1000
verbose = True

In [None]:
log = model.fit(x_train, y_train, batch_size, epochs, val_data=val)

In [None]:
_ = log.plot()

In [None]:
surv = model.predict_surv_df(x_test)

In [None]:
surv = model.interpolate(10).predict_surv_df(x_test)

In [None]:
surv.iloc[:, :5].plot(drawstyle='steps-post')
plt.ylabel('S(t | x)')
_ = plt.xlabel('Time')

In [None]:
ev = EvalSurv(surv, durations_test, events_test, censor_surv='km')

In [None]:
ev.concordance_td('antolini')

In [None]:
input_test = data.iloc[test_index]
durations_test, events_test = get_target(input_test)
ev = EvalSurv(surv, durations_test, events_test, censor_surv='km')
time_grid = np.linspace(durations_test.min(), durations_test.max(), 100)

In [None]:
ev.integrated_brier_score(time_grid) 

In [None]:
ev.integrated_nbll(time_grid) 

# MTLR

In [None]:
from pycox.models import MTLR

In [None]:

sample_labels = labels.loc[list(set(list(data.index.get_level_values('patient'))))]

df_test = sample_labels.sample(frac=0.2)
df_train = sample_labels.drop(df_test.index)
df_val = sample_labels.sample(frac=0.2)
df_train = sample_labels.drop(df_val.index)

cols_standardize = ['age', 'height', 'weight']
cols_leave = ['gender', 'hour', 'eyes', 'motor', 'verbal',
       'ethnicity_BLACK/AFRICAN AMERICAN', 'ethnicity_OTHER',
       'ethnicity_UNKNOWN', 'ethnicity_WHITE',
       'ethnicity_WHITE - OTHER EUROPEAN', 'ethnicity_misc',
       'first_careunit_Cardiac Vascular Intensive Care Unit (CVICU)',
       'first_careunit_Coronary Care Unit (CCU)',
       'first_careunit_Medical Intensive Care Unit (MICU)',
       'first_careunit_Medical/Surgical Intensive Care Unit (MICU/SICU)',
       'first_careunit_Neuro Intermediate', 'first_careunit_Neuro Stepdown',
       'first_careunit_Neuro Surgical Intensive Care Unit (Neuro SICU)',
       'first_careunit_Surgical Intensive Care Unit (SICU)',
       'first_careunit_Trauma SICU (TSICU)',
       'admission_location_EMERGENCY ROOM',
       'admission_location_PHYSICIAN REFERRAL',
       'admission_location_PROCEDURE SITE',
       'admission_location_TRANSFER FROM HOSPITAL',
       'admission_location_TRANSFER FROM SKILLED NURSING FACILITY',
       'admission_location_WALK-IN/SELF REFERRAL', 'admission_location_misc',
       'insurance_Medicaid', 'insurance_Medicare', 'insurance_Other']

standardize = [([col], StandardScaler()) for col in cols_standardize]
leave = [(col, None) for col in cols_leave]

x_mapper = DataFrameMapper(standardize + leave)

st = set(list(df_val.index.get_level_values('patient')))
val_index = [i for i, e in enumerate(list(set(list(data.index.get_level_values('patient'))))) if e in st]
st = set(list(df_train.index.get_level_values('patient')))
train_index = [i for i, e in enumerate(list(set(list(data.index.get_level_values('patient'))))) if e in st]
st = set(list(df_test.index.get_level_values('patient')))
test_index = [i for i, e in enumerate(list(set(list(data.index.get_level_values('patient'))))) if e in st]

input_train = data.iloc[list(train_index)]
input_val = data.iloc[val_index]
input_test = data.iloc[test_index]

num_durations = 10
labtrans = MTLR.label_transform(num_durations)

get_target = lambda df: (df['actualiculos'].values.astype('float32'), df['actualhospitalmortality'].values.astype('int32'))

y_train = labtrans.fit_transform(*get_target(input_train))
y_val = labtrans.transform(*get_target(input_val))

input_train.drop(['uniquepid', 'patienthealthsystemstayid', 'actualhospitalmortality', 'actualiculos'], axis=1, inplace=True)
input_val.drop(['uniquepid', 'patienthealthsystemstayid', 'actualhospitalmortality', 'actualiculos'], axis=1, inplace=True)

x_train = x_mapper.fit_transform(input_train).astype('float32')
x_val = x_mapper.transform(input_val).astype('float32')

val = x_val, y_val

# We don't need to transform the test labels
durations_test, events_test = get_target(input_test)
durations_test = labtrans.transform(*get_target(input_test))[0]

input_test.drop(['uniquepid', 'patienthealthsystemstayid', 'actualhospitalmortality', 'actualiculos'], axis=1, inplace=True)

x_test = x_mapper.transform(input_test).astype('float32')

train = (x_train, y_train)
val = (x_val, y_val)

In [None]:
in_features = x_train.shape[1]
num_nodes = [3*in_features, 5*in_features, 3*in_features]
out_features = labtrans.out_features
batch_norm = True
dropout = 0.5
output_bias = False

net = tt.practical.MLPVanilla(in_features, num_nodes, out_features, batch_norm,
                              dropout, output_bias=output_bias)

In [None]:
model = MTLR(net, tt.optim.Adam, duration_index=labtrans.cuts)

In [None]:
batch_size = 256
lr_finder = model.lr_finder(x_train, y_train, batch_size, tolerance=6)
_ = lr_finder.plot()

In [None]:
lr_finder.get_best_lr()

In [None]:
model.optimizer.set_lr(lr_finder.get_best_lr())

In [None]:
epochs = 1000
verbose = True

In [None]:
log = model.fit(x_train, y_train, batch_size, epochs, val_data=val)

In [None]:
_ = log.plot()

In [None]:
surv = model.predict_surv_df(x_test)

In [None]:
surv = model.interpolate(10).predict_surv_df(x_test)

In [None]:
surv.iloc[:, :5].plot(drawstyle='steps-post')
plt.ylabel('S(t | x)')
_ = plt.xlabel('Time')

In [None]:
ev = EvalSurv(surv, durations_test, events_test, censor_surv='km')

In [None]:
ev.concordance_td('antolini')

In [None]:
input_test = data.iloc[test_index]
durations_test, events_test = get_target(input_test)
ev = EvalSurv(surv, durations_test, events_test, censor_surv='km')
time_grid = np.linspace(durations_test.min(), durations_test.max(), 100)

In [None]:
ev.integrated_brier_score(time_grid) 

In [None]:
ev.integrated_nbll(time_grid) 

# BCESurv

In [None]:
from pycox.models import BCESurv

In [None]:
sample_labels = labels.loc[list(set(list(data.index.get_level_values('patient'))))]

df_test = sample_labels.sample(frac=0.2)
df_train = sample_labels.drop(df_test.index)
df_val = sample_labels.sample(frac=0.2)
df_train = sample_labels.drop(df_val.index)

cols_standardize = ['age', 'height', 'weight']
cols_leave = ['gender', 'hour', 'eyes', 'motor', 'verbal',
       'ethnicity_BLACK/AFRICAN AMERICAN', 'ethnicity_OTHER',
       'ethnicity_UNKNOWN', 'ethnicity_WHITE',
       'ethnicity_WHITE - OTHER EUROPEAN', 'ethnicity_misc',
       'first_careunit_Cardiac Vascular Intensive Care Unit (CVICU)',
       'first_careunit_Coronary Care Unit (CCU)',
       'first_careunit_Medical Intensive Care Unit (MICU)',
       'first_careunit_Medical/Surgical Intensive Care Unit (MICU/SICU)',
       'first_careunit_Neuro Intermediate', 'first_careunit_Neuro Stepdown',
       'first_careunit_Neuro Surgical Intensive Care Unit (Neuro SICU)',
       'first_careunit_Surgical Intensive Care Unit (SICU)',
       'first_careunit_Trauma SICU (TSICU)',
       'admission_location_EMERGENCY ROOM',
       'admission_location_PHYSICIAN REFERRAL',
       'admission_location_PROCEDURE SITE',
       'admission_location_TRANSFER FROM HOSPITAL',
       'admission_location_TRANSFER FROM SKILLED NURSING FACILITY',
       'admission_location_WALK-IN/SELF REFERRAL', 'admission_location_misc',
       'insurance_Medicaid', 'insurance_Medicare', 'insurance_Other']

standardize = [([col], StandardScaler()) for col in cols_standardize]
leave = [(col, None) for col in cols_leave]

x_mapper = DataFrameMapper(standardize + leave)

st = set(list(df_val.index.get_level_values('patient')))
val_index = [i for i, e in enumerate(list(set(list(data.index.get_level_values('patient'))))) if e in st]
st = set(list(df_train.index.get_level_values('patient')))
train_index = [i for i, e in enumerate(list(set(list(data.index.get_level_values('patient'))))) if e in st]
st = set(list(df_test.index.get_level_values('patient')))
test_index = [i for i, e in enumerate(list(set(list(data.index.get_level_values('patient'))))) if e in st]

input_train = data.iloc[list(train_index)]
input_val = data.iloc[val_index]
input_test = data.iloc[test_index]

n_durations = 10
labtrans = LogisticHazard.label_transform(n_durations)

get_target = lambda df: (df['actualiculos'].values.astype('float32'), df['actualhospitalmortality'].values.astype('int32'))
y_train = labtrans.fit_transform(*get_target(input_train))
y_val = labtrans.fit_transform(*get_target(input_val))
y_test = labtrans.transform(*get_target(input_test))

input_train.drop(['uniquepid', 'patienthealthsystemstayid', 'actualhospitalmortality', 'actualiculos'], axis=1, inplace=True)
input_val.drop(['uniquepid', 'patienthealthsystemstayid', 'actualhospitalmortality', 'actualiculos'], axis=1, inplace=True)

x_train = x_mapper.fit_transform(input_train).astype('float32')
x_val = x_mapper.transform(input_val).astype('float32')

train = tt.tuplefy(x_train, y_train)
val = tt.tuplefy(x_val, y_val)

# We don't need to transform the test labels
durations_test, events_test = get_target(input_test)
durations_test = labtrans.transform(*get_target(input_test))[0]

input_test.drop(['uniquepid', 'patienthealthsystemstayid', 'actualhospitalmortality', 'actualiculos'], axis=1, inplace=True)

x_test = x_mapper.transform(input_test).astype('float32')
test = tt.tuplefy(x_test, y_test)

In [None]:
def make_net(train, labtrans, dropout=0.0):
    in_features = train[0].shape[1]
    out_features = labtrans.out_features
    num_nodes = [3*in_features, 5*in_features, 3*in_features]
    net = tt.practical.MLPVanilla(in_features, num_nodes, out_features,
                                  dropout=dropout)
    return net

In [None]:
def fit_and_predict(model_class, train, val, test, labtrans, lr=0.01, n_itp=20):
    net = make_net(train, labtrans)
    model = model_class(net, tt.optim.AdamWR(lr, cycle_eta_multiplier=0.8), duration_index=labtrans.cuts)
    log = model.fit(*train, 256, 256, verbose=False, val_data=val,
                    callbacks=[tt.cb.EarlyStoppingCycle()])
    surv = model.interpolate(n_itp).predict_surv_df(test[0])
    return surv, model

In [None]:
surv_bce, model_bce = fit_and_predict(BCESurv, train, val, test, labtrans, lr=0.0001)

In [None]:
_ = model_bce.log.to_pandas().iloc[1:].plot()

In [None]:
ev_bce_true = EvalSurv(surv_bce, durations_test, events_test, 'km')

In [None]:
ev.concordance_td('antolini')

In [None]:
time_grid = np.linspace(0, 100, 100)

In [None]:
input_test = data.iloc[test_index]
durations_test, events_test = get_target(input_test)
ev = EvalSurv(surv, durations_test, events_test, censor_surv='km')
time_grid = np.linspace(durations_test.min(), durations_test.max(), 100)

In [None]:
ev.integrated_brier_score(time_grid) 

In [None]:
ev.integrated_nbll(time_grid) 

# CoxTime

In [None]:
from pycox.models import CoxTime
from pycox.models.cox_time import MLPVanillaCoxTime

In [None]:

sample_labels = labels.loc[list(set(list(data.index.get_level_values('patient'))))]

df_test = sample_labels.sample(frac=0.2)
df_train = sample_labels.drop(df_test.index)
df_val = sample_labels.sample(frac=0.2)
df_train = sample_labels.drop(df_val.index)

cols_standardize = ['age', 'height', 'weight']
cols_leave = ['gender', 'hour', 'eyes', 'motor', 'verbal',
       'ethnicity_BLACK/AFRICAN AMERICAN', 'ethnicity_OTHER',
       'ethnicity_UNKNOWN', 'ethnicity_WHITE',
       'ethnicity_WHITE - OTHER EUROPEAN', 'ethnicity_misc',
       'first_careunit_Cardiac Vascular Intensive Care Unit (CVICU)',
       'first_careunit_Coronary Care Unit (CCU)',
       'first_careunit_Medical Intensive Care Unit (MICU)',
       'first_careunit_Medical/Surgical Intensive Care Unit (MICU/SICU)',
       'first_careunit_Neuro Intermediate', 'first_careunit_Neuro Stepdown',
       'first_careunit_Neuro Surgical Intensive Care Unit (Neuro SICU)',
       'first_careunit_Surgical Intensive Care Unit (SICU)',
       'first_careunit_Trauma SICU (TSICU)',
       'admission_location_EMERGENCY ROOM',
       'admission_location_PHYSICIAN REFERRAL',
       'admission_location_PROCEDURE SITE',
       'admission_location_TRANSFER FROM HOSPITAL',
       'admission_location_TRANSFER FROM SKILLED NURSING FACILITY',
       'admission_location_WALK-IN/SELF REFERRAL', 'admission_location_misc',
       'insurance_Medicaid', 'insurance_Medicare', 'insurance_Other']

standardize = [([col], StandardScaler()) for col in cols_standardize]
leave = [(col, None) for col in cols_leave]

x_mapper = DataFrameMapper(standardize + leave)

st = set(list(df_val.index.get_level_values('patient')))
val_index = [i for i, e in enumerate(list(set(list(data.index.get_level_values('patient'))))) if e in st]
st = set(list(df_train.index.get_level_values('patient')))
train_index = [i for i, e in enumerate(list(set(list(data.index.get_level_values('patient'))))) if e in st]
st = set(list(df_test.index.get_level_values('patient')))
test_index = [i for i, e in enumerate(list(set(list(data.index.get_level_values('patient'))))) if e in st]

input_train = data.iloc[list(train_index)]
input_val = data.iloc[val_index]
input_test = data.iloc[test_index]

num_durations = 10
labtrans = CoxTime.label_transform()

get_target = lambda df: (df['actualiculos'].values.astype('float32'), df['actualhospitalmortality'].values.astype('int32'))

y_train = labtrans.fit_transform(*get_target(input_train))
y_val = labtrans.transform(*get_target(input_val))

input_train.drop(['uniquepid', 'patienthealthsystemstayid', 'actualhospitalmortality', 'actualiculos'], axis=1, inplace=True)
input_val.drop(['uniquepid', 'patienthealthsystemstayid', 'actualhospitalmortality', 'actualiculos'], axis=1, inplace=True)

x_train = x_mapper.fit_transform(input_train).astype('float32')
x_val = x_mapper.transform(input_val).astype('float32')

val = tt.tuplefy(x_val, y_val)

# We don't need to transform the test labels
durations_test, events_test = get_target(input_test)
durations_test = labtrans.transform(*get_target(input_test))[0]

input_test.drop(['uniquepid', 'patienthealthsystemstayid', 'actualhospitalmortality', 'actualiculos'], axis=1, inplace=True)

x_test = x_mapper.transform(input_test).astype('float32')

In [None]:
in_features = x_train.shape[1]
num_nodes = [32, 32]
out_features = labtrans.out_features
batch_norm = True
dropout = 0.5
output_bias = False
net = MLPVanillaCoxTime(in_features, num_nodes, batch_norm, dropout)

In [None]:
model = CoxTime(net, tt.optim.Adam, labtrans=labtrans)

In [None]:
batch_size = 256
lrfinder = model.lr_finder(x_train, y_train, batch_size, tolerance=2)
_ = lrfinder.plot()

In [None]:
lrfinder.get_best_lr()

In [None]:
model.optimizer.set_lr(lrfinder.get_best_lr())

In [None]:
epochs = 1000
verbose = True

In [None]:
log = model.fit(x_train, y_train, batch_size, epochs, verbose,
                val_data=val.repeat(10).cat())

In [None]:
_ = log.plot()

In [None]:
_ = model.compute_baseline_hazards()

In [None]:
surv = model.predict_surv_df(x_test)

In [None]:
surv.iloc[:, :5].plot()
plt.ylabel('S(t | x)')
_ = plt.xlabel('Time')

In [None]:
ev = EvalSurv(surv, durations_test, events_test, censor_surv='km')
ev.concordance_td('antolini')

In [None]:
input_test = data.iloc[test_index]
durations_test, events_test = get_target(input_test)
ev = EvalSurv(surv, durations_test, events_test, censor_surv='km')
time_grid = np.linspace(durations_test.min(), durations_test.max(), 100)

In [None]:
ev.integrated_brier_score(time_grid) 

In [None]:
ev.integrated_nbll(time_grid) 

# CoxCC

In [None]:
from pycox.models import CoxCC

In [None]:

sample_labels = labels.loc[list(set(list(data.index.get_level_values('patient'))))]

df_test = sample_labels.sample(frac=0.2)
df_train = sample_labels.drop(df_test.index)
df_val = sample_labels.sample(frac=0.2)
df_train = sample_labels.drop(df_val.index)

cols_standardize = ['age', 'height', 'weight']
cols_leave = ['gender', 'hour', 'eyes', 'motor', 'verbal',
       'ethnicity_BLACK/AFRICAN AMERICAN', 'ethnicity_OTHER',
       'ethnicity_UNKNOWN', 'ethnicity_WHITE',
       'ethnicity_WHITE - OTHER EUROPEAN', 'ethnicity_misc',
       'first_careunit_Cardiac Vascular Intensive Care Unit (CVICU)',
       'first_careunit_Coronary Care Unit (CCU)',
       'first_careunit_Medical Intensive Care Unit (MICU)',
       'first_careunit_Medical/Surgical Intensive Care Unit (MICU/SICU)',
       'first_careunit_Neuro Intermediate', 'first_careunit_Neuro Stepdown',
       'first_careunit_Neuro Surgical Intensive Care Unit (Neuro SICU)',
       'first_careunit_Surgical Intensive Care Unit (SICU)',
       'first_careunit_Trauma SICU (TSICU)',
       'admission_location_EMERGENCY ROOM',
       'admission_location_PHYSICIAN REFERRAL',
       'admission_location_PROCEDURE SITE',
       'admission_location_TRANSFER FROM HOSPITAL',
       'admission_location_TRANSFER FROM SKILLED NURSING FACILITY',
       'admission_location_WALK-IN/SELF REFERRAL', 'admission_location_misc',
       'insurance_Medicaid', 'insurance_Medicare', 'insurance_Other']

standardize = [([col], StandardScaler()) for col in cols_standardize]
leave = [(col, None) for col in cols_leave]

x_mapper = DataFrameMapper(standardize + leave)

st = set(list(df_val.index.get_level_values('patient')))
val_index = [i for i, e in enumerate(list(set(list(data.index.get_level_values('patient'))))) if e in st]
st = set(list(df_train.index.get_level_values('patient')))
train_index = [i for i, e in enumerate(list(set(list(data.index.get_level_values('patient'))))) if e in st]
st = set(list(df_test.index.get_level_values('patient')))
test_index = [i for i, e in enumerate(list(set(list(data.index.get_level_values('patient'))))) if e in st]

input_train = data.iloc[list(train_index)]
input_val = data.iloc[val_index]
input_test = data.iloc[test_index]

get_target = lambda df: (df['actualiculos'].values.astype('float32'), df['actualhospitalmortality'].values.astype('int32'))

y_train = get_target(input_train)
y_val = get_target(input_val)

input_train.drop(['uniquepid', 'patienthealthsystemstayid', 'actualhospitalmortality', 'actualiculos'], axis=1, inplace=True)
input_val.drop(['uniquepid', 'patienthealthsystemstayid', 'actualhospitalmortality', 'actualiculos'], axis=1, inplace=True)

x_train = x_mapper.fit_transform(input_train).astype('float32')
x_val = x_mapper.transform(input_val).astype('float32')

val = tt.tuplefy(x_val, y_val)

# We don't need to transform the test labels
durations_test, events_test = get_target(input_test)

input_test.drop(['uniquepid', 'patienthealthsystemstayid', 'actualhospitalmortality', 'actualiculos'], axis=1, inplace=True)

x_test = x_mapper.transform(input_test).astype('float32')

In [None]:
in_features = x_train.shape[1]
num_nodes = [32, 32]
out_features = 1
batch_norm = True
dropout = 0.5
output_bias = False

net = tt.practical.MLPVanilla(in_features, num_nodes, out_features, batch_norm,
                              dropout, output_bias=output_bias)

In [None]:
model = CoxCC(net, tt.optim.Adam)

In [None]:
batch_size = 256
lrfinder = model.lr_finder(x_train, y_train, batch_size, tolerance=2)
_ = lrfinder.plot()

In [None]:
lrfinder.get_best_lr()

In [None]:
model.optimizer.set_lr(lrfinder.get_best_lr())

In [None]:
epochs = 1000
verbose = True

In [None]:
log = model.fit(x_train, y_train, batch_size, epochs, verbose,
                val_data=val.repeat(10).cat())

In [None]:
_ = log.plot()

In [None]:
_ = model.compute_baseline_hazards()

In [None]:
surv = model.predict_surv_df(x_test)

In [None]:
surv.iloc[:, :5].plot()
plt.ylabel('S(t | x)')
_ = plt.xlabel('Time')

In [None]:
ev = EvalSurv(surv, durations_test, events_test, censor_surv='km')
ev.concordance_td('antolini')

In [None]:
input_test = data.iloc[test_index]
durations_test, events_test = get_target(input_test)
ev = EvalSurv(surv, durations_test, events_test, censor_surv='km')
time_grid = np.linspace(durations_test.min(), durations_test.max(), 100)

In [None]:
ev.integrated_brier_score(time_grid) 

In [None]:
ev.integrated_nbll(time_grid) 

# DeepSurv

In [None]:
from pycox.models import CoxPH

In [None]:

sample_labels = labels.loc[list(set(list(data.index.get_level_values('patient'))))]

df_test = sample_labels.sample(frac=0.2)
df_train = sample_labels.drop(df_test.index)
df_val = sample_labels.sample(frac=0.2)
df_train = sample_labels.drop(df_val.index)

cols_standardize = ['age', 'height', 'weight']
cols_leave = ['gender', 'hour', 'eyes', 'motor', 'verbal',
       'ethnicity_BLACK/AFRICAN AMERICAN', 'ethnicity_OTHER',
       'ethnicity_UNKNOWN', 'ethnicity_WHITE',
       'ethnicity_WHITE - OTHER EUROPEAN', 'ethnicity_misc',
       'first_careunit_Cardiac Vascular Intensive Care Unit (CVICU)',
       'first_careunit_Coronary Care Unit (CCU)',
       'first_careunit_Medical Intensive Care Unit (MICU)',
       'first_careunit_Medical/Surgical Intensive Care Unit (MICU/SICU)',
       'first_careunit_Neuro Intermediate', 'first_careunit_Neuro Stepdown',
       'first_careunit_Neuro Surgical Intensive Care Unit (Neuro SICU)',
       'first_careunit_Surgical Intensive Care Unit (SICU)',
       'first_careunit_Trauma SICU (TSICU)',
       'admission_location_EMERGENCY ROOM',
       'admission_location_PHYSICIAN REFERRAL',
       'admission_location_PROCEDURE SITE',
       'admission_location_TRANSFER FROM HOSPITAL',
       'admission_location_TRANSFER FROM SKILLED NURSING FACILITY',
       'admission_location_WALK-IN/SELF REFERRAL', 'admission_location_misc',
       'insurance_Medicaid', 'insurance_Medicare', 'insurance_Other']

standardize = [([col], StandardScaler()) for col in cols_standardize]
leave = [(col, None) for col in cols_leave]

x_mapper = DataFrameMapper(standardize + leave)

st = set(list(df_val.index.get_level_values('patient')))
val_index = [i for i, e in enumerate(list(set(list(data.index.get_level_values('patient'))))) if e in st]
st = set(list(df_train.index.get_level_values('patient')))
train_index = [i for i, e in enumerate(list(set(list(data.index.get_level_values('patient'))))) if e in st]
st = set(list(df_test.index.get_level_values('patient')))
test_index = [i for i, e in enumerate(list(set(list(data.index.get_level_values('patient'))))) if e in st]

input_train = data.iloc[list(train_index)]
input_val = data.iloc[val_index]
input_test = data.iloc[test_index]

get_target = lambda df: (df['actualiculos'].values.astype('float32'), df['actualhospitalmortality'].values.astype('int32'))

y_train = get_target(input_train)
y_val = get_target(input_val)

input_train.drop(['uniquepid', 'patienthealthsystemstayid', 'actualhospitalmortality', 'actualiculos'], axis=1, inplace=True)
input_val.drop(['uniquepid', 'patienthealthsystemstayid', 'actualhospitalmortality', 'actualiculos'], axis=1, inplace=True)

x_train = x_mapper.fit_transform(input_train).astype('float32')
x_val = x_mapper.transform(input_val).astype('float32')

val = x_val, y_val

# We don't need to transform the test labels
durations_test, events_test = get_target(input_test)

input_test.drop(['uniquepid', 'patienthealthsystemstayid', 'actualhospitalmortality', 'actualiculos'], axis=1, inplace=True)

x_test = x_mapper.transform(input_test).astype('float32')

In [None]:
in_features = x_train.shape[1]
num_nodes = [32, 32]
out_features = 1
batch_norm = True
dropout = 0.5
output_bias = False

net = tt.practical.MLPVanilla(in_features, num_nodes, out_features, batch_norm,
                              dropout, output_bias=output_bias)

In [None]:
model = CoxPH(net, tt.optim.Adam)

In [None]:
batch_size = 256
lrfinder = model.lr_finder(x_train, y_train, batch_size, tolerance=10)
_ = lrfinder.plot()

In [None]:
lrfinder.get_best_lr()

In [None]:
model.optimizer.set_lr(lrfinder.get_best_lr())

In [None]:
epochs = 1000
verbose = True

In [None]:
log = model.fit(x_train, y_train, batch_size, epochs, verbose,
                val_data=val, val_batch_size=batch_size)

In [None]:
_ = log.plot()

In [None]:
_ = model.compute_baseline_hazards()

In [None]:
surv = model.predict_surv_df(x_test)

In [None]:
surv.iloc[:, :5].plot()
plt.ylabel('S(t | x)')
_ = plt.xlabel('Time')

In [None]:
ev = EvalSurv(surv, durations_test, events_test, censor_surv='km')

In [None]:
ev.concordance_td()

In [None]:
time_grid = np.linspace(durations_test.min(), durations_test.max(), 100)

In [None]:
ev.integrated_brier_score(time_grid)

In [None]:
ev.integrated_nbll(time_grid)

# PCHazard

In [None]:
from pycox.models import PCHazard

In [None]:

sample_labels = labels.loc[list(set(list(data.index.get_level_values('patient'))))]

df_test = sample_labels.sample(frac=0.2)
df_train = sample_labels.drop(df_test.index)
df_val = sample_labels.sample(frac=0.2)
df_train = sample_labels.drop(df_val.index)

cols_standardize = ['age', 'height', 'weight']
cols_leave = ['gender', 'hour', 'eyes', 'motor', 'verbal',
       'ethnicity_BLACK/AFRICAN AMERICAN', 'ethnicity_OTHER',
       'ethnicity_UNKNOWN', 'ethnicity_WHITE',
       'ethnicity_WHITE - OTHER EUROPEAN', 'ethnicity_misc',
       'first_careunit_Cardiac Vascular Intensive Care Unit (CVICU)',
       'first_careunit_Coronary Care Unit (CCU)',
       'first_careunit_Medical Intensive Care Unit (MICU)',
       'first_careunit_Medical/Surgical Intensive Care Unit (MICU/SICU)',
       'first_careunit_Neuro Intermediate', 'first_careunit_Neuro Stepdown',
       'first_careunit_Neuro Surgical Intensive Care Unit (Neuro SICU)',
       'first_careunit_Surgical Intensive Care Unit (SICU)',
       'first_careunit_Trauma SICU (TSICU)',
       'admission_location_EMERGENCY ROOM',
       'admission_location_PHYSICIAN REFERRAL',
       'admission_location_PROCEDURE SITE',
       'admission_location_TRANSFER FROM HOSPITAL',
       'admission_location_TRANSFER FROM SKILLED NURSING FACILITY',
       'admission_location_WALK-IN/SELF REFERRAL', 'admission_location_misc',
       'insurance_Medicaid', 'insurance_Medicare', 'insurance_Other']

standardize = [([col], StandardScaler()) for col in cols_standardize]
leave = [(col, None) for col in cols_leave]

x_mapper = DataFrameMapper(standardize + leave)

st = set(list(df_val.index.get_level_values('patient')))
val_index = [i for i, e in enumerate(list(set(list(data.index.get_level_values('patient'))))) if e in st]
st = set(list(df_train.index.get_level_values('patient')))
train_index = [i for i, e in enumerate(list(set(list(data.index.get_level_values('patient'))))) if e in st]
st = set(list(df_test.index.get_level_values('patient')))
test_index = [i for i, e in enumerate(list(set(list(data.index.get_level_values('patient'))))) if e in st]

input_train = data.iloc[list(train_index)]
input_val = data.iloc[val_index]
input_test = data.iloc[test_index]

num_durations = 10
labtrans = PCHazard.label_transform(num_durations)

get_target = lambda df: (df['actualiculos'].values.astype('float32'), df['actualhospitalmortality'].values.astype('int32'))

y_train = labtrans.fit_transform(*get_target(input_train))
y_val = labtrans.transform(*get_target(input_val))

input_train.drop(['uniquepid', 'patienthealthsystemstayid', 'actualhospitalmortality', 'actualiculos'], axis=1, inplace=True)
input_val.drop(['uniquepid', 'patienthealthsystemstayid', 'actualhospitalmortality', 'actualiculos'], axis=1, inplace=True)

x_train = x_mapper.fit_transform(input_train).astype('float32')
x_val = x_mapper.transform(input_val).astype('float32')

train = (x_train, y_train)
val = (x_val, y_val)

# We don't need to transform the test labels
durations_test, events_test = get_target(input_test)

input_test.drop(['uniquepid', 'patienthealthsystemstayid', 'actualhospitalmortality', 'actualiculos'], axis=1, inplace=True)

x_test = x_mapper.transform(input_test).astype('float32')

In [None]:
in_features = x_train.shape[1]
num_nodes = [3*in_features, 5*in_features, 3*in_features]
out_features = labtrans.out_features
batch_norm = True
dropout = 0.5
output_bias = False

net = tt.practical.MLPVanilla(in_features, num_nodes, out_features, batch_norm, dropout)

In [None]:
model = PCHazard(net, tt.optim.Adam, duration_index=labtrans.cuts)

In [None]:
batch_size = 256
lr_finder = model.lr_finder(x_train, y_train, batch_size, tolerance=8)
_ = lr_finder.plot()

In [None]:
lr_finder.get_best_lr()

In [None]:
model.optimizer.set_lr(lr_finder.get_best_lr())

In [None]:
epochs = 1000
log = model.fit(x_train, y_train, batch_size, epochs, val_data=val)

In [None]:
_ = log.plot()

In [None]:
surv = model.predict_surv_df(x_test)

In [None]:
model.sub = 10

In [None]:
surv = model.predict_surv_df(x_test)
surv.iloc[:, :5].plot(drawstyle='steps-post')
plt.ylabel('S(t | x)')
_ = plt.xlabel('Time')

In [None]:
ev = EvalSurv(surv, durations_test, events_test, censor_surv='km')

In [None]:
ev.concordance_td('antolini')

In [None]:
time_grid = np.linspace(durations_test.min(), durations_test.max(), 100)

In [None]:
ev.integrated_brier_score(time_grid) 

In [None]:
ev.integrated_nbll(time_grid) 