This file allows to analyze results obtained by running experiments_paper.

In [1]:
import os 
import pickle
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import sys

sys.path.append('../')
sys.path.append('../DeepSurvivalMachines/')
from nfg import datasets

In [11]:
# Change this to analyze other datasets result
dataset = 'SYNTHETIC_COMPETING'

In [12]:
path = '../Results/' # Path where the data is saved
x, t, e, covariates = datasets.load_dataset(dataset, competing = True, normalize = False) # Open the data

In [13]:
from sksurv.metrics import concordance_index_ipcw, brier_score, cumulative_dynamic_auc, integrated_brier_score

### Utils
def evaluate(survival, e = e, t = t, groups = None):
    folds = survival.iloc[:, -1].values
    survival = survival.iloc[:, :-1]
    times = survival.columns.get_level_values(1).unique()
    risk = 1 - survival

    results = {}

    # If multiple risk, compute cause specific metrics
    for r in survival.columns.get_level_values(0).unique():
        for fold in np.arange(5):
            e_train, t_train = e[folds != fold], t[folds != fold]
            e_test,  t_test  = e[folds == fold], t[folds == fold]
            g_train, g_test = (None, None) if groups is None else (groups[folds != fold], groups[folds == fold])

            et_train = np.array([(e_train[i] == int(r), t_train[i]) for i in range(len(e_train))], # For estimation censoring
                            dtype = [('e', bool), ('t', float)])
            et_test = np.array([(e_test[i] == int(r), t_test[i]) for i in range(len(e_test))], # For measure performance for given outcome
                            dtype = [('e', bool), ('t', float)])
            selection = (t_test < t_train.max()) | (e[folds == fold] != int(r))
            
            et_test, g_test = et_test[selection], None if groups is None else g_test[selection]
            survival_fold = survival[folds == fold][r][selection]
            risk_fold = risk[folds == fold][r][selection]

            try:
                brs = brier_score(et_train, et_test, survival_fold.values, times)[1]
            except:
                brs = [np.nan] * len(times)
            # Concordance and ROC for each time
            gcis, cis, rocs = [], [], []
            res_group = {} if groups is None else {"CIS_{}".format(group): [] for group in groups.unique()}
            for time in times:
                try:
                    gcis.append(concordance_index_ipcw(et_train, et_test, risk_fold[time])[0])
                except:
                    gcis.append(np.nan)
                    
                try:
                    cis.append(concordance_index_ipcw(et_train, et_test, risk_fold[time], float(time))[0])
                except:
                    cis.append(np.nan)

                try:
                    rocs.append(cumulative_dynamic_auc(et_train, et_test, risk_fold[time], float(time))[0][0])
                except:
                    rocs.append(np.nan)

                try:
                    for group in groups.unique():
                        res_group["CIS_{}".format(group)].append(concordance_index_ipcw(et_train[g_train == group], et_test[g_test == group], risk_fold[time][g_test == group], float(time))[0])
                except:
                    pass

            res = {"GCIS": gcis, "CIS": cis, "BRS": brs, "ROCS": rocs}
            if groups is not None:
                res.update(res_group)
            results[(r, fold)] = pd.DataFrame.from_dict(res, orient='index', columns = times)
    results = pd.concat(results)
    results.index.set_names(['Risk', 'Fold', 'Metric'], inplace = True)

    return results

In [14]:
# To analyze group performance 
if dataset == "FRAMINGHAM":
    groups = pd.DataFrame(x, columns = covariates).AGE
    groups = pd.cut(groups, [0, 40, 50, 60, 100], labels=["<40", "40-50", "50-60", "60+"])
    groups.value_counts()
else:
    groups = None

In [15]:
# Open file and compute performance
predictions, clusters, results, likelihood = {}, {}, {}, {}
for file_name in os.listdir(path):
    if dataset in file_name and '.csv' in file_name: 
        model = file_name       
        model = model[model.rindex('_') + 1: model.index('.')]

        print("Opening :", file_name, ' - ', model)
        if 'finegray' in model:
            # Reinitialize index
            predictions[model] = pd.read_csv(path + file_name, header = [0], index_col = 0)
            index = pd.DataFrame([[i, t] for i in ('1', '2') for t in predictions[model].columns[:3]] + [['Use', '']])
            predictions[model].columns = pd.MultiIndex.from_frame(index)
        else:
            predictions[model] = pd.read_csv(path + file_name, header = [0, 1], index_col = 0)
        results[model] = evaluate(predictions[model], groups = groups)

        cluster_file = file_name[: file_name.index('.')] + '_clusters.pickle'
        if os.path.isfile(path + cluster_file):
            clusters[model] = pickle.load(open(path + cluster_file, 'rb'))
# Rename
# TODO: Add your method in the list for nicer display
dict_name = {'nfg': 'NeuralFG', 'nfgcs': 'NeuralFG OvA', 'finegray': 'Fine Gray', 'dsm': 'DSM', 'dsmcs': 'DSM OvA', 'dh': 'DeepHit', 'dhcs': 'DeepHit OvA'} 

likelihood = pd.DataFrame.from_dict(likelihood, 'index').rename(dict_name)
results = pd.concat(results).rename(dict_name)
results.index.set_names('Model', 0, inplace = True)

Opening : SYNTHETIC_COMPETING_nfgcs.csv  -  nfgcs
30000
Opening : SYNTHETIC_COMPETING_dsmcs.csv  -  dsmcs
30000
Opening : SYNTHETIC_COMPETING_dh.csv  -  dh
30000
Opening : SYNTHETIC_COMPETING_dhcs.csv  -  dhcs
30000
Opening : SYNTHETIC_COMPETING_nfg.csv  -  nfg
30000
Opening : SYNTHETIC_COMPETING_dsm.csv  -  dsm
30000


  results.index.set_names('Model', 0, inplace = True)


In [16]:
table = results.groupby(['Model', 'Risk', 'Metric']).apply(lambda x: pd.Series(["{:.3f} ({:.2f})".format(mean, std) for mean, std in zip(x.mean(), x.std())], index = x.columns.astype(float)))
table = table.loc[table.index.get_level_values(2).isin(['CIS', 'BRS'])].unstack(level=-1).stack(level=0).unstack(level=-1).loc[:, ['CIS', 'BRS']]
#table = table.loc[['NeuralFG', 'NeuralFG OvA', 'DSM', 'DeepHit', 'Fine Gray']]
table = table.reorder_levels(['Risk', 'Model']).sort_index(level = 0, sort_remaining = False)

table

Unnamed: 0_level_0,Metric,CIS,CIS,CIS,BRS,BRS,BRS
Unnamed: 0_level_1,Unnamed: 1_level_1,4.0,12.0,31.0,4.0,12.0,31.0
Risk,Model,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
1,DSM,0.785 (0.02),0.751 (0.01),0.718 (0.01),0.070 (0.00),0.123 (0.00),0.175 (0.00)
1,DSM OvA,0.781 (0.01),0.746 (0.01),0.714 (0.01),0.070 (0.00),0.125 (0.00),0.178 (0.00)
1,DeepHit,0.774 (0.01),0.746 (0.01),0.714 (0.01),0.069 (0.00),0.137 (0.00),0.226 (0.01)
1,DeepHit OvA,0.788 (0.02),0.752 (0.01),0.717 (0.01),0.070 (0.00),0.137 (0.00),0.201 (0.00)
1,NeuralFG,0.796 (0.01),0.758 (0.01),0.715 (0.01),0.065 (0.00),0.123 (0.00),0.194 (0.00)
1,NeuralFG OvA,0.797 (0.01),0.761 (0.01),0.727 (0.01),0.063 (0.00),0.116 (0.00),0.171 (0.00)
2,DSM,0.737 (0.02),0.710 (0.01),0.683 (0.01),0.069 (0.00),0.132 (0.00),0.183 (0.00)
2,DSM OvA,0.791 (0.02),0.745 (0.02),0.714 (0.02),0.067 (0.00),0.126 (0.00),0.176 (0.01)
2,DeepHit,0.781 (0.02),0.740 (0.01),0.711 (0.01),0.066 (0.00),0.137 (0.00),0.227 (0.01)
2,DeepHit OvA,0.789 (0.02),0.744 (0.02),0.709 (0.02),0.066 (0.00),0.137 (0.00),0.199 (0.00)


In [17]:
print(table.to_latex())

\begin{tabular}{llllllll}
\toprule
  & Metric & \multicolumn{3}{l}{CIS} & \multicolumn{3}{l}{BRS} \\
  & {} &          4.0  &          12.0 &          31.0 &          4.0  &          12.0 &          31.0 \\
Risk & Model &               &               &               &               &               &               \\
\midrule
1 & DSM &  0.785 (0.02) &  0.751 (0.01) &  0.718 (0.01) &  0.070 (0.00) &  0.123 (0.00) &  0.175 (0.00) \\
  & DSM OvA &  0.781 (0.01) &  0.746 (0.01) &  0.714 (0.01) &  0.070 (0.00) &  0.125 (0.00) &  0.178 (0.00) \\
  & DeepHit &  0.774 (0.01) &  0.746 (0.01) &  0.714 (0.01) &  0.069 (0.00) &  0.137 (0.00) &  0.226 (0.01) \\
  & DeepHit OvA &  0.788 (0.02) &  0.752 (0.01) &  0.717 (0.01) &  0.070 (0.00) &  0.137 (0.00) &  0.201 (0.00) \\
  & NeuralFG &  0.796 (0.01) &  0.758 (0.01) &  0.715 (0.01) &  0.065 (0.00) &  0.123 (0.00) &  0.194 (0.00) \\
  & NeuralFG OvA &  0.797 (0.01) &  0.761 (0.01) &  0.727 (0.01) &  0.063 (0.00) &  0.116 (0.00) &  0.171 (0.00) \\


  print(table.to_latex())


# Split by age

In [None]:
table = results.groupby(['Model', 'Risk', 'Metric']).apply(lambda x: pd.Series(["{:.3f} ({:.2f})".format(mean, std) for mean, std in zip(x.mean(), x.std())], index = x.columns.astype(float)))
table = table.loc[table.index.get_level_values(2).str.contains('CIS_')].unstack(level=-1).stack(level=0).loc[['NeuralFG', 'NeuralFG OvA'], ['CIS_<40', 'CIS_40-50', 'CIS_50-60', 'CIS_60+']]
#table = table.loc[['NeuralFG', 'NeuralFG OvA', 'DSM', 'DeepHit', 'Fine Gray']]
table = table.reorder_levels(['Risk', 'Model', None]).sort_index(level = 0, sort_remaining = False)

difference = (results.loc['NeuralFG'] - results.loc['NeuralFG OvA']).groupby(['Risk', 'Metric']).apply(lambda x: pd.Series(["{:.3f} ({:.2f})".format(mean, std) for mean, std in zip(x.mean(), x.std())], index = x.columns.astype(float)))
difference = difference.loc[difference.index.get_level_values(1).str.contains('CIS_')].unstack(level=-1).stack(level=0).loc[:, ['CIS_<40', 'CIS_40-50', 'CIS_50-60', 'CIS_60+']]

KeyError: "None of [Index(['CIS_<40', 'CIS_40-50', 'CIS_50-60', 'CIS_60+'], dtype='object', name='Metric')] are in the [columns]"

In [None]:
table = table.loc['2'].T.stack().reorder_levels([None, 'Metric']).sort_index(level = 0, sort_remaining = False)
table['Difference'] = difference.loc['2'].stack()
table

Unnamed: 0_level_0,Model,NeuralFG,NeuralFG OvA,Difference
Unnamed: 0_level_1,Metric,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2153.75,CIS_<40,0.848 (0.17),0.825 (0.16),0.024 (0.02)
2153.75,CIS_40-50,0.861 (0.10),0.838 (0.09),0.022 (0.02)
2153.75,CIS_50-60,0.829 (0.03),0.815 (0.03),0.014 (0.04)
2153.75,CIS_60+,0.827 (0.04),0.792 (0.05),0.035 (0.04)
4589.5,CIS_<40,0.772 (0.10),0.776 (0.08),-0.004 (0.03)
4589.5,CIS_40-50,0.808 (0.06),0.804 (0.06),0.004 (0.01)
4589.5,CIS_50-60,0.746 (0.02),0.743 (0.03),0.003 (0.03)
4589.5,CIS_60+,0.743 (0.04),0.726 (0.05),0.018 (0.02)
6620.75,CIS_<40,0.744 (0.09),0.749 (0.07),-0.005 (0.02)
6620.75,CIS_40-50,0.774 (0.05),0.778 (0.05),-0.003 (0.01)


In [None]:
print(table.to_latex())

\begin{tabular}{lllll}
\toprule
        & Model &      NeuralFG &  NeuralFG OvA &     Difference \\
{} & Metric &               &               &                \\
\midrule
2153.75 & CIS\_<40 &  0.848 (0.17) &  0.825 (0.16) &   0.024 (0.02) \\
        & CIS\_40-50 &  0.861 (0.10) &  0.838 (0.09) &   0.022 (0.02) \\
        & CIS\_50-60 &  0.829 (0.03) &  0.815 (0.03) &   0.014 (0.04) \\
        & CIS\_60+ &  0.827 (0.04) &  0.792 (0.05) &   0.035 (0.04) \\
4589.50 & CIS\_<40 &  0.772 (0.10) &  0.776 (0.08) &  -0.004 (0.03) \\
        & CIS\_40-50 &  0.808 (0.06) &  0.804 (0.06) &   0.004 (0.01) \\
        & CIS\_50-60 &  0.746 (0.02) &  0.743 (0.03) &   0.003 (0.03) \\
        & CIS\_60+ &  0.743 (0.04) &  0.726 (0.05) &   0.018 (0.02) \\
6620.75 & CIS\_<40 &  0.744 (0.09) &  0.749 (0.07) &  -0.005 (0.02) \\
        & CIS\_40-50 &  0.774 (0.05) &  0.778 (0.05) &  -0.003 (0.01) \\
        & CIS\_50-60 &  0.718 (0.01) &  0.719 (0.02) &  -0.001 (0.02) \\
        & CIS\_60+ &  0.701 (0.05)

  print(table.to_latex())


In [None]:
print(pd.concat({"Age Group": groups, "Event": pd.Series(e)}, 1).groupby(['Age Group', 'Event']).size().unstack().to_latex())

\begin{tabular}{lrrr}
\toprule
Event &     0 &    1 &    2 \\
Age Group &       &      &      \\
\midrule
<40       &   607 &   55 &   92 \\
40-50     &  1099 &  209 &  331 \\
50-60     &   653 &  289 &  437 \\
60+       &   131 &  234 &  297 \\
\bottomrule
\end{tabular}



  print(pd.concat({"Age Group": groups, "Event": pd.Series(e)}, 1).groupby(['Age Group', 'Event']).size().unstack().to_latex())
  print(pd.concat({"Age Group": groups, "Event": pd.Series(e)}, 1).groupby(['Age Group', 'Event']).size().unstack().to_latex())
