Analysis of the models obtain previously

In [None]:
import pandas as pd
from lifelines.fitters.kaplan_meier_fitter import KaplanMeierFitter
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors

pd.set_option('display.max_columns', None)
pd.set_option('display.expand_frame_repr', False)

import seaborn as sns
custom_params = {"axes.spines.right": False, "axes.spines.top": False, "axes.spines.left": False,
                 "axes.spines.bottom": False, "figure.dpi": 300, 'savefig.dpi': 300}
sns.set_theme(style = "whitegrid", rc = custom_params, font_scale = 1.75)


In [None]:
iters = 100

subselection = True
mode = "admission"

subsample = True

In [None]:
outcomes = pd.read_csv('data/mimic/outcomes_first_day{}.csv'.format('_subselection' if subselection else ''), index_col = 0)

In [None]:
path = 'results{}/mimic/'.format('_subselection' if subselection else '')

results_weekend = path + 'weekends/' # Train only on weekends but test on both
results_weekdays  = path + 'weekdays/' # Train only on weekdays but test on both
periods = ["Weekend", "Weekday"]

# Random split
results_random  = 'results{}/mimic/random/'.format('_subselection' if subselection else '')

# Open results

In [None]:
import os

In [None]:
predictions = {}
for period, results in zip(periods + ['Random'], [results_weekend, results_weekdays, results_random]):
    predictions[period] = {}
    root = 'survival_'
    print(period)
    for file in sorted(os.listdir(results)):
        if (root not in file) or ('.csv' not in file):
            continue

        if (period == 'Weekday') and subsample:
            if not('under' in file):
                continue
        elif (period == 'Weekday') and ('under' in file):
            continue   
        
        name = file[file.index(root)+len(root):file.rindex('.csv')]
        predictions[period][name] = pd.read_csv(results + file, index_col=0)
        print(file, ' -> ', name)

# Select only if present in both
intersection = predictions[periods[0]].keys() & predictions[periods[1]].keys() & predictions['Random'].keys()
labels = {}
for period in periods:
    predictions[period] = {model: predictions[period][model] for model in intersection}
    labels[period] = predictions[period][list(intersection)[0]].Use

### Kaplman Meier estimator

In [None]:
horizons = [7, 30]

In [None]:
test = labels[periods[0]] != 'External'
kmf = KaplanMeierFitter(label = periods[0])
kmf.fit(outcomes.Remaining.loc[test], outcomes.Event.loc[test])
kmf.plot()

test = labels[periods[1]] != 'External'
kmf = KaplanMeierFitter(label = periods[1])
kmf.fit(outcomes.Remaining.loc[test], outcomes.Event.loc[test])
kmf.plot()

for h in horizons:
    plt.axvline(h, ls = '--') 

plt.grid(alpha = 0.3)
plt.xlabel('Time after observation (in days)')
plt.ylabel('Survival estimation')
plt.xlim(0, 10)
plt.ylim(0.8, 1)

Differencesin observed labels between training and testing 

### All metrics

Evaluate all metrics on datasets

In [None]:
from experiment import *

In [None]:
# Compute and display performances per group of model
models_group = {"Baselines": ["deepsurv"],
                "Sequential": ["lstm"],
                "Time aware": ["gru"],
                "Proposed": ["joint"]}

performances, rocs = {}, {}
for source in labels:
    performances[source], rocs[source] = {}, {}
    for target in labels:
        print('-' * 42)
        print('{} -> {}'.format(source, target))

        performances[source][target], rocs[source][target] = {}, {}
        for group in models_group:
            print('*' * 21)
            print(group)
            
            for model in sorted(predictions[source].keys()):
                if not(any([m in model for m in models_group[group]])):
                    continue
                np.random.seed(42)
                preds = predictions[source][model]

                print(model)
                # Target to ensure fair comparison !!!
                selection = outcomes.Day <= 4 if (target == 'Weekday') else outcomes.Day > 4 # Update to use all data even when under sampling
                test = (labels[target] != 'Train') & (selection) # Use the data that will be used for both (you want to use the subset of point that was not used for test in the other group)
                test = test[test].index

                train = labels[target] == 'Train' # Use Kaplan meier on the training data of the target (you want to use the target in the other group)
                train = train[train].index

                performances[source][target][model], rocs[source][target][model] = evaluate(outcomes.Event, outcomes.Remaining, preds.drop(columns = 'Use'), train, test, horizons = horizons)

In [None]:
performances['Random'], rocs['Random'] = {'Random': {}}, {'Random': {}}
for model in sorted(predictions['Random'].keys()):
    np.random.seed(42)
    preds = predictions['Random'][model]

    print(model)

    test = preds.Use != 'Train'
    test = test[test].index

    train = preds.Use == 'Train'
    train = train[train].index

    performances['Random']['Random'][model], rocs['Random']['Random'][model] = evaluate(outcomes.Event, outcomes.Remaining, preds.drop(columns = 'Use'), train, test, horizons = horizons)

# Comparison Models

In [None]:
horizons_display = ['7', '30', 'Overall']
plot = "TD Concordance Index" #"Brier Score", "TD Concordance Index"

In [None]:
naming = {
    'joint_value+time+mask': r"$\bf{DeepJoint}$",

    # 'joint+missing_value+time+mask': r"$\bf{DeepJoint - Missing}$",
    # 'joint+time_value+time+mask': r"$\bf{DeepJoint - Time}$",
    'lstm_value+time+mask': "Feature",

    'gru_d': "GRU-D",

    'lstm_value': "Ignore",
    'lstm+resampled': "Resample",
    'deepsurv_count': "Count",
    'deepsurv_last': "Last"
}

In [None]:
performances_display = {
    s :{
        t: {
            name: performances[s][t][model][plot]
            for model, name in naming.items()
        }
        for t in performances[s]
    }
    for s in periods + ['Random']
}
rocs_display = {
    s :{
        t: {
            name: {str(i): np.array(rocs[s][t][model][i]) for i in rocs[s][t][model]}
            for model, name in naming.items()
        }
        for t in performances[s]
    }
    for s in periods + ['Random']
}

In [None]:
colors = list(plt.rcParams['axes.prop_cycle'])[:len(naming)]

In [None]:
pdst = pd.concat(performances_display['Random']['Random'], axis = 1).iloc[:,::-1]

#plt.rcParams.update({'font.size': 12})
fig, axes = plt.subplots(ncols = len(horizons_display), sharey = True, sharex = True, figsize=(12,5))
print("{} -> {}".format('Random', 'Random'))
for i, ax in zip(horizons_display, axes):
    perf_metric_mean = pdst.loc['Mean', i]
    perf_metric_std = 1.96 * pdst.loc['Std', i] / np.sqrt(iters)
    for j, (model, c) in enumerate(zip(pdst.columns, colors[::-1])):
        p = ax.plot((perf_metric_mean[model] + perf_metric_std[model], perf_metric_mean[model] - perf_metric_std[model]), (j, j), c = c['color'], alpha = 0.5, linewidth = 4)
        ax.scatter(perf_metric_mean[model], j, s = 200, label = model, marker = '|', color = p[-1].get_color(),linewidths = 4)
        
    ax.grid(alpha = 0.3)
    ax.set_yticks(range(len(pdst.columns)))
    ax.set_yticklabels(pdst.columns)
    ax.set_title('{} days'.format(i) if i != 'Overall' else 'Integrated')
    #ax.set_xlabel(plot)
plt.tight_layout()
plt.show()

print(pd.DataFrame.from_dict({m: ["{:.3f} ({:.3f})".format(pdst.loc['Mean'].loc[m].loc[i], pdst.loc['Std'].loc[m].loc[i]) for i in pdst.loc['Mean'].columns] for m in horizons_display}, columns = pdst.columns, orient = 'index').T.loc[::-1].to_latex())

# Comparison between weekend and weekdays

In [None]:
# Plot double barh to display performances
### Create dfs of mean and std
for time in performances_display:
    opposite = periods[1] if time == periods[0] else periods[0]

    transfer = "{} -> {}".format(opposite, time)
    training = "{} -> {}".format(time, time)

    fig, axes = plt.subplots(ncols = len(horizons_display), figsize = (14, 3))
    
    axes[0].set_ylabel(transfer)

    for ax, horizon, m in zip(axes, horizons_display, ['o', 'D', 'x', '.']):
        perf_metric_mean = pd.DataFrame({
                    transfer : pd.concat(performances_display[opposite][time], axis = 1).loc['Mean', horizon],
                    training : pd.concat(performances_display[time][time], axis = 1).loc['Mean', horizon]
                })
        perf_metric_std =  pd.DataFrame({
                    transfer : pd.concat(performances_display[opposite][time], axis = 1).loc['Std', horizon],
                    training : pd.concat(performances_display[time][time], axis = 1).loc['Std', horizon]
                }) 

        for model, c in zip(perf_metric_mean.index, colors[:len(perf_metric_mean)]):
            ax.scatter(perf_metric_mean.loc[model][training], perf_metric_mean.loc[model][transfer], color = c['color'], marker = m, alpha = 0.5, s = 100)
            ax.plot([perf_metric_mean.loc[model][training] - 1.96 *perf_metric_std.loc[model][training]/ np.sqrt(iters), perf_metric_mean.loc[model][training] + 1.96 *perf_metric_std.loc[model][training]/ np.sqrt(iters)], [perf_metric_mean.loc[model][transfer], perf_metric_mean.loc[model][transfer]], color = c['color'])
            ax.plot([perf_metric_mean.loc[model][training], perf_metric_mean.loc[model][training]], [perf_metric_mean.loc[model][transfer] - 1.96*perf_metric_std.loc[model][transfer]/ np.sqrt(iters), perf_metric_mean.loc[model][transfer] + 1.96* perf_metric_std.loc[model][transfer]/ np.sqrt(iters)], color = c['color'])

        ax.axline((perf_metric_mean.mean().mean(), perf_metric_mean.mean().mean()), slope=1, color = 'k', ls = ':', alpha = 0.5)
        ax.set_xlabel(training)
        ax.grid(alpha = 0.5)

        means = perf_metric_mean.mean()
        margin = 1.96 * perf_metric_mean.std().max()
        ax.set_xlim(means[training] - margin, means[training] + margin)
        ax.set_ylim(means[transfer] - margin, means[transfer] + margin)
        ax.set_title('At {} days'.format(horizon) if horizon != 'Overall' else 'Integrated')

        table = {
            mode: {
                model: "{:.3f} ({:.3f})".format(perf_metric_mean.loc[model][mode], perf_metric_std.loc[model][mode]) for model in perf_metric_mean.index
            } for mode in perf_metric_mean.loc[model].index
        }
        diff = {name: np.abs(rocs_display[time][time][name][horizon] - rocs_display[opposite][time][name][horizon]) for model, name in naming.items()}
        table['Difference'] = {model: "{:.3f} ({:.3f})".format(diff[model].mean(), diff[model].std()) for model in diff}
        table = pd.DataFrame(table)
        print(horizon, table.to_latex())
    else:
        # Display
        ## Legend
        for model, c in zip(perf_metric_mean.index, colors[:len(perf_metric_mean)]):
            plt.scatter([],[], color = c['color'], label = model)
        plt.legend(loc='center left', bbox_to_anchor=(1.05, 0.5), title = 'Models')
        plt.show()

    

# Subgroup Analysis

Compute the difference in change for different group

In [None]:
subgroups = outcomes.GENDER == 'F' #outcomes.Sexoutcomes.INSURANCE.replace({"Medicare": "Public", "Medicaid": "Public", "Government": "Public"}) != 'Public'
groups = ['Male', 'Female']#['Private', 'Public']

In [None]:
rocs_group = {}
for group, subgroups in zip(groups, [subgroups, ~subgroups]):
    rocs_group[group] = {}
    for source in labels:
        rocs_group[group][source] = {}
        for target in labels:
            print('-' * 42)
            print('{} -> {}'.format(source, target))
            rocs_group[group][source][target] = {}

            for family in models_group:
                print('*' * 21)
                print(family)
                
                for model in sorted(predictions[source].keys()):
                    if not(any([m in model for m in models_group[family]])):
                        continue
                    np.random.seed(42)
                    preds = predictions[source][model].loc[subgroups]

                    print(model)
                    # Target to ensure fair comparison !!!
                    selection = outcomes.Day <= 4 if (target == 'Weekday') else outcomes.Day > 4 # Update to use all data even when under sampling
                    test = (labels[target].loc[subgroups] != 'Train') & (selection.loc[subgroups]) # Use the data that will be used for both (you want to use the subset of point that was not used for test in the other group)
                    test = test[test].index

                    train = labels[target].loc[subgroups] == 'Train' # Use Kaplan meier on the training data of the target (you want to use the target in the other group)
                    train = train[train].index

                    _, rocs_group[group][source][target][model] = evaluate(outcomes.Event, outcomes.Remaining, preds.drop(columns = 'Use'), train, test, horizons = horizons)

In [None]:
rocs_display = {
    group: {
        s :{
            t: {
                name: {str(i): np.array(rocs_group[group][s][t][model][i]) for i in rocs_group[group][s][t][model]}
                for model, name in naming.items()
            }
            for t in performances[s]
        }
        for s in periods
    }
    for group in groups
}

In [None]:
difference = {}
for time in performances_display:
    opposite = periods[1] if (time == periods[0]) else periods[0]
    difference[time], display = {}, {}
    for group in groups:
        difference[time][group] = {name: pd.Series(rocs_display[group][time][time][name]['Overall'] - rocs_display[group][opposite][time][name]['Overall']).abs() for model, name in naming.items()}
        delta_text = {model: "{:.3f} ({:.3f})".format(difference[time][group][model].mean(), difference[time][group][model].std()) for model in difference[time][group]}
        display[group] = pd.Series(delta_text)
    difference[time] = {name: difference[time][groups[0]][name] - difference[time][groups[1]][name] for model, name in naming.items()}
    display['Difference'] = pd.Series({model: "{:.3f} ({:.3f})".format(difference[time][model].mean(), difference[time][model].std()) for model in difference[time]})
    display = pd.concat(display, axis = 1)
    print(time, horizon, display.to_latex())

In [None]:
for time in performances_display:
    fig, axes = plt.subplots(ncols = 2, figsize = (9, 3))
    opposite = periods[1] if time == periods[0] else periods[0]

    transfer = "{} -> {}".format(opposite, time)
    training = "{} -> {}".format(time, time)

    axes[0].set_ylabel(transfer)

    for group, ax in zip(groups, axes):
        perf_metric_mean = pd.DataFrame({
                    transfer : pd.Series({model: rocs_display[group][opposite][time][model]['Overall'].mean() for model in rocs_display[group][opposite][time]}),
                    training : pd.Series({model: rocs_display[group][time][time][model]['Overall'].mean() for model in rocs_display[group][opposite][time]}),
                })
        perf_metric_std =  pd.DataFrame({
                    transfer : pd.Series({model: rocs_display[group][opposite][time][model]['Overall'].std() for model in rocs_display[group][opposite][time]}),
                    training : pd.Series({model: rocs_display[group][time][time][model]['Overall'].std() for model in rocs_display[group][opposite][time]}),

                }) 
        colors = list(plt.rcParams['axes.prop_cycle'])
        #colors[3] = colors[8]
        for model, c in zip(perf_metric_mean.index, colors[:len(perf_metric_mean)]):
            ax.scatter(perf_metric_mean.loc[model][training], perf_metric_mean.loc[model][transfer], color = c['color'], marker = '.', alpha = 0.5, s = 100)
            ax.plot([perf_metric_mean.loc[model][training] - 1.96 *perf_metric_std.loc[model][training]/ np.sqrt(iters), perf_metric_mean.loc[model][training] + 1.96 *perf_metric_std.loc[model][training]/ np.sqrt(iters)], [perf_metric_mean.loc[model][transfer], perf_metric_mean.loc[model][transfer]], color = c['color'])
            ax.plot([perf_metric_mean.loc[model][training], perf_metric_mean.loc[model][training]], [perf_metric_mean.loc[model][transfer] - 1.96*perf_metric_std.loc[model][transfer]/ np.sqrt(iters), perf_metric_mean.loc[model][transfer] + 1.96* perf_metric_std.loc[model][transfer]/ np.sqrt(iters)], color = c['color'])

        ax.axline((perf_metric_mean.mean().mean(), perf_metric_mean.mean().mean()), slope=1, color = 'k', ls = ':', alpha = 0.5)
        ax.set_xlabel(training)
        ax.grid(alpha = 0.5)

        means = perf_metric_mean.mean()
        margin = 1.96 * perf_metric_mean.std().max()
        ax.set_xlim(means[training] - margin, means[training] + margin)
        ax.set_ylim(means[transfer] - margin, means[transfer] + margin)
        ax.set_title(group)
    else:
        # Display
        ## Legend
        for model, c in zip(perf_metric_mean.index, colors[:len(perf_metric_mean)]):
            plt.scatter([],[], color = c['color'], label = model)
        plt.legend(loc='center left', bbox_to_anchor=(1.05, 0.5), title = 'Models')
        plt.show()
