In [None]:
import os 
import numpy as np
import pandas as pd

from generate import *

import sys
sys.path.append('../')
sys.path.append('../auton-survival/')

In [None]:
path = '../Results_ntc/' # Path where the data is saved
mode =  'obs' # Mode to be observational or randomised
random_seed = 42 # Critical to ensure the data is the same
parameter = 30000 # Parameter being tested
root = 'generatesize_' # generatelinear_ or generate_ or generatesmall_ or generatehomogenous_

In [None]:
from pycox.evaluation import EvalSurv
from sklearn.metrics import adjusted_rand_score

### Utils: The evaluatino metrics used
def evaluate(clusters_pred, te_cluster, survival_pred, survival_gt, a, t, e, times, groups): 
    folds = survival_pred[('Use',)]
    survival_pred = survival_pred.drop(columns = ['Use', 'Assignment'])
    results = {}

    # Compute performance for each fold
    for fold in np.arange(5):
        # Subselect all variables in fold
        clusters_pred_fold, survival_pred_fold = clusters_pred[(folds == fold).values], survival_pred[(folds == fold).values]
        groups_fold = groups.loc[clusters_pred_fold.index]
        te_fold, te_gt_fold = survival_pred_fold['treated'] - survival_pred_fold['untreated'], \
                        (survival_gt['treated'] - survival_gt['untreated']).loc[survival_pred_fold.index]

        # Evaluate quality cluster at the population level
        results_fold = {}
        results_fold['Population'] = {
            "Rand_index": adjusted_rand_score(groups_fold, clusters_pred_fold.apply(lambda x: x.argmax(), 1)), 
            "MSE_Mean_TE": mse_mean(te_fold, te_gt_fold)}
        
        # At the group level
        for group in groups.unique() if groups is not None else []:
            selection = groups_fold == group
            alpha_max = clusters_pred_fold[selection].mean(0).argmax()
            cluster_te = te_cluster[fold][:, alpha_max] if te_cluster is not None else None
            results_fold[group] = {
                "MSE_Cluster_TE": mse_cluster(te_gt_fold[selection], cluster_te) if te_cluster is not None else np.nan
            }

        # Measure predicitive performance
        results_fold = {
            'Overall': pd.DataFrame(results_fold),
            'Factual': pd.DataFrame(performance_metric(survival_pred_fold, survival_gt, a == 1, t, e, times, groups))}
        results[fold] = pd.concat(results_fold, axis = 1)

    return pd.concat(results)

def mse_mean(pred, gt):
    return np.abs(pred.mean(0).values - gt.mean(0).values).mean()

def mse_cluster(pred, mean):
    return np.abs(pred.mean(0).values - mean).mean()

def performance_metric(survival_pred, survival_gt, a, t, e, times, groups):
    train_index = survival_gt.index.difference(survival_pred.index)
    
    # Select data
    survival_pred = pd.concat([survival_pred['treated'][a.loc[survival_pred.index]], survival_pred['untreated'][~a.loc[survival_pred.index]]], axis = 0).loc[survival_pred.index]
    survival_gt = pd.concat([survival_gt['treated'][a], survival_gt['untreated'][~a]], axis = 0).loc[survival_gt.index]
    survival_pred.columns = survival_pred.columns.astype(float) 
    survival_gt.columns = survival_gt.columns.astype(float) 

    # Evaluate
    e_train, t_train = e.loc[train_index].values, t.loc[train_index].values
    e_test,  t_test  = e.loc[survival_pred.index].values, t.loc[survival_pred.index].values
    g_test = groups.loc[survival_pred.index]

    selection = (t_test < t_train.max()) | (e_test == 0)
    survival_pred = survival_pred[selection]
    e_test, t_test, g_test = e_test[selection], t_test[selection], g_test[selection]

    survival_train = survival_gt.loc[train_index]

    km = EvalSurv(survival_train.T, t_train, e_train, censor_surv = 'km')
    test_eval = EvalSurv(survival_pred.T, t_test, e_test, censor_surv = km)
    results = {'Population':{}} 
    try: results['Population']['CIS'] = test_eval.concordance_td()
    except: results['Population']['CIS'] = np.nan
    try: results['Population']['BRS'] = test_eval.integrated_brier_score(times.to_numpy())
    except: results['Population']['BRS'] = np.nan

    for group in groups.unique() if groups is not None else []:
        test_eval = EvalSurv(survival_pred[g_test == group].T, t_test[g_test == group], e_test[g_test == group], censor_surv = km)
        results[group] = {}
        try: results[group]['CIS'] = test_eval.concordance_td()
        except: results[group]['CIS'] = np.nan
        try: results[group]['BRS'] = test_eval.integrated_brier_score(times.to_numpy())
        except: results[group]['BRS'] = np.nan

    return results

In [None]:
# Rename
# TODO: Add your method in the list for nicer display
dict_name = {'ntc': 'NTC', 'ntc+uncorrect': 'NTC (Unadjusted)', 'cmhe+g': 'CMHE (Treatment)', 'cmhe+k': 'CMHE (Survival)'} 

In [None]:
from experiment import Experiment

In [None]:
# Open file and compute performance
treated, untreated, clusters, results, te_cluster = {}, {}, {}, {}, {}
for file_name in sorted(os.listdir(path)):
    if root in file_name and '.csv' in file_name: 
        if not (str(random_seed) in file_name): continue
        if mode not in file_name: continue
        if (('{}+'.format(random_seed) in file_name) or ('{}+'.format(mode) in file_name)) and not(('+{}='.format(parameter) in file_name) or ('+{}_'.format(parameter) in file_name)): continue
        model = file_name

        model = model[model.rindex('_') + 1: model.rindex('.')]
        model = dict_name[model] if model in dict_name else model
        print("Opening :", file_name, ' - ', model, ' - ', random_seed, ' - ', mode)

        if model not in results:
            results[model], treated[model], untreated[model], clusters[model], te_cluster[model] = {}, {}, {}, {}, {}

        predictions = pd.read_csv(path + file_name, header = [0, 1], index_col = 0).dropna()
        treated[model][random_seed]  = predictions[['treated']].droplevel(0, axis = 1)
        untreated[model][random_seed]= predictions[['untreated']].droplevel(0, axis = 1)
        clusters[model][random_seed] = predictions[['Assignment']].droplevel(0, axis = 1)

        # Remove last columns and change name column to flo
        times = treated[model][random_seed].columns.astype(float)

        # Generate associated ground truth
        if parameter == 5:
            centers = ([0, 2.25], [-2.25, -1], [2.25, -1], [-3, 3], [4, 4])
        else:
            centers = ([0, 2.25], [-2.25, -1], [2.25, -1])

        if 'linear' in root:
            x, a, t, e, (cluster_centers, parameters, outcomes, assignement) = generate_linear(random_seed, mode = mode)
            cifs = compute_cif_linear(x, outcomes.cluster, cluster_centers, parameters, times)
        else:
            x, a, t, e, (cluster_centers, parameters, outcomes, assignement) = generate(random_seed, mode = mode, centers = centers, 
                                                                                        homogenous = 'homogenous' in root, 
                                                                                        proportions = [0.625, 0.25, 0.125] if 'small' in root else None, 
                                                                                        size = parameter if 'size' in root else 3000,
                                                                                        percentage_treatment = parameter if 'treat' in root else 0.5)
            cifs = compute_cif(x, outcomes.cluster, cluster_centers, parameters, times)

        model_file = file_name.replace('.csv', '.pickle')
        if os.path.isfile(path + model_file):
            model_pickle = Experiment.load(path + model_file)
            te_cluster[model][random_seed] = model_pickle.clusters(times)
            try:
                print(model_pickle.best_model[0].predict_propensity(x.values))
            except Exception as er:
                print(er)
        else:
            te_cluster[model][random_seed] = None


        # Evaluate
        results[model][random_seed] = evaluate(clusters[model][random_seed], te_cluster[model][random_seed], predictions, 1 - cifs,
                                               a, t, e, times, outcomes.cluster)
else:
    te_cluster['GT'] = {random_seed: {}}
    for fold in range(5): 
        index = (predictions[('Use',)] == fold).values
        te_cluster['GT'][random_seed][fold] =  (cifs['untreated'] - cifs['treated']).loc[index].groupby(outcomes.cluster.loc[index]).mean(0).T.values
                                                

results = pd.concat({model: pd.concat(results[model], names = ['Seed']) for model in results})
results.index.set_names(['Model', 'Fold', 'Metric'], level = [0, 2, 3], inplace = True)

In [None]:
results.groupby(['Model', 'Seed', 'Metric']).apply(lambda x:  pd.Series(["{:.3f} ({:.3f})".format(mean, std) for mean, std in zip(x.mean(), x.std())], index = x.columns))