In [None]:
import pandas as pd
import sys
sys.path.append('NeuralFineGray/')
sys.path.append('NeuralFineGray/DeepSurvivalMachines/')

In [None]:
labs = pd.read_csv('data/mimic/labs_first_day_subselection.csv', index_col = [0, 1], header = [0])
outcomes = pd.read_csv('data/mimic/outcomes_first_day_subselection.csv', index_col = 0)

In [None]:
test = True
n_iter = 10
if test:
    outcomes = outcomes.sample(frac = 0.2, random_state = 0).sort_index()
    labs = labs[labs.index.get_level_values(0).isin(outcomes.index)]

In [None]:
competing = False
if not(competing):
    outcomes.Event = outcomes.Event == 1

# Split 

In [None]:
mode = "random" # "random", "weekday", "weekend"Split on date - Weekend vs weekdays 

In [None]:
if mode == "weekday":
    # Train only on weekends but test on both
    training =  training = outcomes.Day <= 4
    results = 'results_subselection/mimic/weekday/'
elif mode == "weekend":
    # Train only on weekends but test on both
    training = outcomes.Day > 4
    results = 'results_subselection/mimic/weekend/'
else:
    # Random split
    training = pd.Series(outcomes.index.isin(outcomes.sample(frac = 0.8, random_state = 0).index), index = outcomes.index)
    results = 'results_subselection/mimic/random/'
results += 'survival_'

In [None]:
print('Total patients: {}'.format(len(training)))
print('Training patients: {}'.format(training.sum()))

# Models

In [None]:
from experiment import *

### DeepSurv

In [None]:
layers = [[50, 50, 50]]
predictions = {}

As a baseline, we build a DeepSurv on the last carried forward observations

##### Last Carried Forward

In [None]:
last = labs.groupby('Patient').ffill().groupby('Patient').last() # No need to impute as all should have a value (due to preprocessing)
last.fillna(last.mean(), inplace = True)

In [None]:
se = ShiftExperiment.create(model = 'deepsurv', 
                     hyper_grid = {"survival_args": [{"layers": l} for l in layers],
                        "lr" : [1e-3, 1e-4],
                        "batch": [100, 250]
                     }, 
                     path = results + 'deepsurv_last', 
                     force = True, save = False, n_iter = n_iter)

In [None]:
predictions['last'] = se.train(last, outcomes.Remaining, outcomes.Event, training)

##### Count

In [None]:
count = (~labs.isna()).groupby('Patient').sum() # Compute counts

In [None]:
se = ShiftExperiment.create(model = 'deepsurv', 
                    hyper_grid = {"survival_args": [{"layers": l} for l in layers],
                        "lr" : [1e-3, 1e-4],
                        "batch": [100, 250]
                    }, 
                    path = results + 'deepsurv_count', 
                    force = True, save = False, n_iter = n_iter)

In [None]:
predictions['count'] = se.train(pd.concat([last, count.add_prefix('count_')], axis = 1), outcomes.Remaining, outcomes.Event, training)

## LSTM

In [None]:
hyper_grid = {
        "layers": [2],
        "hidden": [25, 50],
 
        "survival_args": [{"layers": l} for l in layers],

        "lr" : [1e-3, 1e-4],
    }

#### Value data only

Replace missing data and use time to predict

In [None]:
cov_simple, ie_to_simple, ie_since_simple, mask_simple, time_simple, event_simple = process(labs.copy(), outcomes)

In [None]:
se = ShiftExperiment.create(model = 'joint', 
                    hyper_grid = hyper_grid,
                    path = results + 'lstm_value',
                    force = True, save = False, n_iter = n_iter)

In [None]:
predictions['lstm_value'] = se.train(cov_simple, time_simple, event_simple, training, ie_to_simple, ie_since_simple, mask_simple)

#### Values and time and mask

In [None]:
labs_selection = pd.concat([labs.copy(), labs.isna().add_suffix('_mask'), compute(labs, time_since_last).add_suffix('_time')], axis = 1)
cov_time, ie_to_time, ie_since_time, mask_time, time_time, event_time = process(labs_selection, outcomes)

In [None]:
se = ShiftExperiment.create(model = 'joint', 
                    hyper_grid = hyper_grid,
                    path = results + 'lstm_value+time+mask',
                    force = True, save = False, n_iter = n_iter)

In [None]:
predictions['lstm_value+time+mask'] = se.train(cov_time, time_time, event_time, training, ie_to_time, ie_since_time, mask_time)

#### Values resampled

In [None]:
import datetime

# Resampling
labs_resample = labs.copy()
labs_resample = labs_resample.set_index(pd.to_datetime(labs_resample.index.get_level_values('Time'), unit = 'D'), append = True) 
labs_resample = labs_resample.groupby('Patient').resample('1H', level = 2).mean() 
labs_resample.index = labs_resample.index.map(lambda x: (x[0], (x[1] - datetime.datetime(1970,1,1)).total_seconds() / (3600 * 24)))
# Ensure last time step is the same
shift = labs_resample.groupby('Patient').apply(lambda x: x.index[-1][1]) - labs.groupby('Patient').apply(lambda x: x.index[-1][1])
labs_resample.index = labs_resample.index.map(lambda x: (x[0], (x[1] - shift[x[0]])))

cov_resample, ie_to_resample, ie_since_resample, mask_resample, time_resample, event_resample = process(labs_resample, outcomes)

In [None]:
se = ShiftExperiment.create(model = 'joint', 
                    hyper_grid = hyper_grid,
                    path = results + 'lstm+resampled',
                    force = True, save = False, n_iter = n_iter)

In [None]:
predictions['lstm+resampled'] = se.train(cov_resample, time_resample, event_resample, training, ie_to_resample, ie_since_resample, mask_resample)

### GRU - D

In [None]:
hyper_grid_gru = hyper_grid.copy()
hyper_grid_gru["typ"] = ['GRUD']

In [None]:
se = ShiftExperiment.create(model = 'joint', 
                     hyper_grid = hyper_grid_gru,
                     path = results + 'gru_d+mask',
                     force = True, save = False, n_iter = n_iter)

In [None]:
predictions['gru_d+mask'] = se.train(cov_simple, time_simple, event_simple, training, ie_to_simple, ie_since_simple, mask_simple)

### Latent ODE

In [None]:
hyper_grid_gru["typ"] = ['ODE']

In [None]:
se = ShiftExperiment.create(model = 'joint', 
                    hyper_grid = hyper_grid_gru,
                    path = results + 'ode+mask',
                    force = True, save = False, n_iter = n_iter)

In [None]:
predictions['ode+mask'] = se.train(cov_simple, time_simple, event_simple, training, ie_to_simple, ie_since_simple, mask_simple)

# Proposed approach

In [None]:
hyper_grid_joint = hyper_grid.copy()
hyper_grid_joint.update(
    {
        "temporal": ["single"], 
        "temporal_args": [{"layers": l} for l in layers],
    }
)

### Joint model on value only

In [None]:
se = ShiftExperiment.create(model = 'joint', 
                    hyper_grid = hyper_grid_joint,
                    path = results + 'joint+value',
                    force = True, save = False, n_iter = n_iter)

In [None]:
predictions['joint+value'] = se.train(cov_simple, time_simple, event_simple, training, ie_to_simple, ie_since_simple, mask_simple)

### Joint model on value, mask and time

In [None]:
mask_mixture = np.full(len(cov_time.columns), False)
mask_mixture[:len(labs.columns)] = True

hyper_grid_joint['obs_mask'] = [mask_mixture] # Avoids to compute the observational process on the additional dimensions

In [None]:
se = ShiftExperiment.create(model = 'joint', 
                    hyper_grid = hyper_grid_joint,
                    path = results + 'joint_value+time+mask',
                    force = True, save = False, n_iter = n_iter)

In [None]:
predictions['joint_value+time+mask'] = se.train(cov_time, time_time, event_time, training, ie_to_time, ie_since_time, mask_time)