In [1]:
import pandas as pd
import json
import yaml
import numpy as np
import matplotlib.pyplot as plt
from sklearn.impute import SimpleImputer
from lifelines import CoxPHFitter
from lifelines import CoxTimeVaryingFitter
from lifelines.utils import concordance_index
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import config
from copy import deepcopy


from sklearn.model_selection import train_test_split
from sksurv.util import Surv
from sksurv.metrics import concordance_index_censored
from sksurv.linear_model import CoxPHSurvivalAnalysis
from sksurv.ensemble import RandomSurvivalForest,GradientBoostingSurvivalAnalysis
from sklearn_pandas import DataFrameMapper
from sksurv.functions import StepFunction


import torch 
import torchtuples as tt 
from pycox.datasets import metabric
from pycox.models import LogisticHazard, DeepHitSingle, PMF, CoxPH
from pycox.evaluation import EvalSurv
from pycox.preprocessing.label_transforms import LabTransDiscreteTime

from sksurv.metrics import integrated_brier_score                                           


from itertools import product
from pathlib import Path
from preprocess.impute import impute_data
from utils import *
import os
from skimage.exposure import match_histograms

In [None]:
with open(config.COLUMNS_CLASS, 'r') as yaml_file:
    column_class = yaml.load(yaml_file, Loader=yaml.FullLoader)

real_baseline_df = pd.read_parquet(config.BASELINE_COMBINE_FILE).reset_index()
real_baseline_df = real_baseline_df.drop(columns=['admit'], errors='ignore')

impute_method = "add_missing"
apply_histogram_equalize = False

cgan_df = pd.read_csv(config.CGAN_SYNTHETIC_BASELINE + impute_method + '.csv')
survadgan_df =  pd.read_csv(config.SURVADGAN_SYNTHETIC_BASELINE + impute_method + '.csv')
tvae_df = pd.read_csv(config.TVAE_SYNTHETIC_BASELINE + impute_method + '.csv')
adgan_df = pd.read_csv(config.ADGAN_SYNTHETIC_BASELINE + impute_method + '.csv')
ddpm_df = pd.read_csv(config.DDPM_SYNTHETIC_BASELINE + impute_method + '.csv')
nflow_df = pd.read_csv(config.NFLOW_SYNTHETIC_BASELINE + impute_method + '.csv')

real_train_idx = np.load(config.INDEX_TRAIN )
real_val_idx = np.load(config.INDEX_VAL)
real_test_idx = np.load(config.INDEX_TEST )

survadgan_train_idx = np.load(config.SURVADGAN_INDEX_TRAIN+ impute_method + '.npy')
survadgan_val_idx = np.load(config.SURVADGAN_INDEX_VAL+ impute_method + '.npy')
survadgan_test_idx = np.load(config.SURVADGAN_INDEX_TEST+ impute_method + '.npy')

cgan_train_idx = np.load(config.CGAN_INDEX_TRAIN+ impute_method + '.npy')
cgan_val_idx = np.load(config.CGAN_INDEX_VAL+ impute_method + '.npy')
cgan_test_idx = np.load(config.CGAN_INDEX_TEST+ impute_method + '.npy')

tvae_train_idx = np.load(config.TVAE_INDEX_TRAIN+ impute_method + '.npy')
tvae_val_idx = np.load(config.TVAE_INDEX_VAL+ impute_method + '.npy')
tvae_test_idx = np.load(config.TVAE_INDEX_TEST+ impute_method + '.npy')

adgan_train_idx = np.load(config.ADGAN_INDEX_TRAIN+ impute_method + '.npy')
adgan_val_idx = np.load(config.ADGAN_INDEX_VAL+ impute_method + '.npy')
adgan_test_idx = np.load(config.ADGAN_INDEX_TEST+ impute_method + '.npy')

ddpm_train_idx = np.load(config.DDPM_INDEX_TRAIN+ impute_method + '.npy')
ddpm_val_idx = np.load(config.DDPM_INDEX_VAL+ impute_method + '.npy')
ddpm_test_idx = np.load(config.DDPM_INDEX_TEST+ impute_method + '.npy')

nflow_train_idx = np.load(config.NFLOW_INDEX_TRAIN+ impute_method + '.npy')
nflow_val_idx = np.load(config.NFLOW_INDEX_VAL+ impute_method + '.npy')
nflow_test_idx = np.load(config.NFLOW_INDEX_TEST+ impute_method + '.npy')

cgan_df.columns = [col.replace('_1', '') for col in cgan_df.columns]
tvae_df.columns = [col.replace('_1', '') for col in tvae_df.columns]
survadgan_df.columns = [col.replace('_1', '') for col in survadgan_df.columns]
adgan_df.columns = [col.replace('_1', '') for col in adgan_df.columns]
ddpm_df.columns = [col.replace('_1', '') for col in ddpm_df.columns]
nflow_df.columns = [col.replace('_1', '') for col in nflow_df.columns]

column_order = real_baseline_df.columns.tolist()
cgan_df = cgan_df[column_order]
tvae_df = tvae_df[column_order]
survadgan_df = survadgan_df[column_order]
adgan_df = adgan_df[column_order]
ddpm_df = ddpm_df[column_order]
nflow_df = nflow_df[column_order]

if apply_histogram_equalize:
    use_index = np.concatenate([real_train_idx, real_val_idx])

    adgan_df['Days'] = match_histograms(adgan_df['Days'].to_numpy(), real_baseline_df['Days'][real_baseline_df['ENC_HN'].isin(use_index)].to_numpy())
    cgan_df['Days'] = match_histograms(cgan_df['Days'].to_numpy(), real_baseline_df['Days'][real_baseline_df['ENC_HN'].isin(use_index)].to_numpy())
    survadgan_df['Days'] = match_histograms(survadgan_df['Days'].to_numpy(), real_baseline_df['Days'][real_baseline_df['ENC_HN'].isin(use_index)].to_numpy())
    tvae_df['Days'] = match_histograms(tvae_df['Days'].to_numpy(), real_baseline_df['Days'][real_baseline_df['ENC_HN'].isin(use_index)].to_numpy())
    ddpm_df['Days'] = match_histograms(ddpm_df['Days'].to_numpy(), real_baseline_df['Days'][real_baseline_df['ENC_HN'].isin(use_index)].to_numpy())
    nflow_df['Days'] = match_histograms(nflow_df['Days'].to_numpy(), real_baseline_df['Days'][real_baseline_df['ENC_HN'].isin(use_index)].to_numpy())

In [3]:
#! Check continuous column to normalized
continuous_cols = [col for col in column_class['continuous'] if (col != 'Days') and col in real_baseline_df.columns]
log_transform_cols = []

def preprocess_data(df, train_idx, val_idx, test_idx, continuous_cols=None, 
                   log_transform_cols=None, verbose=True):
    
    train_df = df.loc[train_idx].copy()
    val_df = df.loc[val_idx].copy() 
    test_df = df.loc[test_idx].copy()

    if log_transform_cols and continuous_cols:
        if verbose:
            print(f"Applying log transformation to: {log_transform_cols}")
        for col in log_transform_cols:
            if col in continuous_cols:
                train_df[col] = np.log(train_df[col] + 1e-8)
                val_df[col] = np.log(val_df[col] + 1e-8)
                test_df[col] = np.log(test_df[col] + 1e-8)
    
    if continuous_cols:
        scaler = StandardScaler()
        train_df[continuous_cols] = scaler.fit_transform(train_df[continuous_cols])
        val_df[continuous_cols] = scaler.transform(val_df[continuous_cols])
        test_df[continuous_cols] = scaler.transform(test_df[continuous_cols])
    
    if verbose:
        print("Data preprocessing completed.")
        print(f"Training set shape: {train_df.shape}")
        print(f"Validation set shape: {val_df.shape}")
        print(f"Test set shape: {test_df.shape}")

    for df_split in [train_df, val_df, test_df]:
        for col in ['dead', 'admit']:
            if col in df_split.columns:
                df_split[col] = df_split[col].astype(bool)
    
    return train_df, val_df, test_df, scaler

def tune_hyperparameters(model_class, param_grid, train_data, val_data, 
                        scoring_func, fit_func, predict_func, maximize=True, verbose=True):
    
    best_score = -np.inf if maximize else np.inf
    best_params = None
    results = []

    param_names = list(param_grid.keys())
    param_values = list(param_grid.values())

    for param_combo in product(*param_values):
        params = dict(zip(param_names, param_combo))

        try:
            model = model_class(**params)
            fit_func(model, train_data)
            predictions = predict_func(model, val_data)
            score = scoring_func(val_data, predictions)

            results.append({**params, "score": score})
            if (maximize and score > best_score) or (not maximize and score < best_score):
                best_score = score
                best_params = params
                if verbose:
                    param_str = ", ".join(f"{k}={v}" for k, v in params.items())
                    # print(f"{param_str} -> score={score:.4f}")

        except Exception as e:
            if verbose:
                param_str = ", ".join(f"{k}={v}" for k, v in params.items())
                # print(f"Failed for {param_str} -> {e}")
            results.append({**params, "score": np.nan, "error": str(e)})

    return best_params, best_score, results

def save_plot(model, model_label, train_label, event_col, out_dir):
    from pathlib import Path
    Path(out_dir).mkdir(parents=True, exist_ok=True)

    filename = f"{train_label}.png"
    full_path = Path(out_dir) / filename

    ax = model.plot()
    ax.set_title(f"{model_label}: train {train_label} ({event_col})")
    plt.tight_layout()
    plt.savefig(full_path, dpi=300)
    plt.close()
    print(f"Plot saved to: {full_path}")




In [4]:
# Survival Analysis Libraries

def run_lifelines(train_df, val_df, test_df, event_col, model_type, param_grid, train_label, final_model = None):

    def fit_func(model, data):
        model.fit(data, duration_col='Days',event_col=event_col)

    def predict_func(model, data):
        return model.predict_partial_hazard(data)

    def scoring_func(data, predictions):
        return concordance_index(data['Days'], -predictions, data[event_col])
    
    if final_model is None:

        best_params, _, _ = tune_hyperparameters(model_type, param_grid, train_df, val_df, fit_func=fit_func, predict_func=predict_func, scoring_func=scoring_func)


        final_model = model_type(**best_params)

        final_model.fit(pd.concat([train_df, val_df]), 'Days', event_col)
    
    test_pred = final_model.predict_partial_hazard(test_df)
    c_index = concordance_index(test_df['Days'], -test_pred, test_df[event_col])
    
    lower, upper = np.percentile(test_df['Days'], [10, 90])
    times  = np.arange(lower, upper + 1)   
    y_train = Surv.from_dataframe(event_col, "Days", train_df)
    y_test  = Surv.from_dataframe(event_col, "Days", test_df)
    surv_mat = (final_model.predict_survival_function(test_df, times=times).values.T  )

    ibs = integrated_brier_score(y_train, y_test, surv_mat, times)

    return {"c_index": c_index, "ibs": ibs}, final_model

def run_sksurv(train_df, val_df, test_df, event_col, model_type, param_grid, train_label, final_model = None):

    def _xy(df):
        X = df.drop(["Days", event_col], axis=1)
        y = Surv.from_dataframe(event_col, "Days", df)
        return X, y

    X_train, y_train = _xy(train_df)
    X_val,   y_val   = _xy(val_df)
    X_test,  y_test  = _xy(test_df)

    def fit_func(model, data):
        X, y = _xy(data)
        model.fit(X, y)

    def predict_func(model, data):
        X, _ = _xy(data)
        return model.predict(X)

    def scoring_func(data, predictions):
        _, y = _xy(data)
        return concordance_index_censored(y[event_col], y['Days'], predictions)[0]
    
    if final_model is None:

        best_params, _, _ = tune_hyperparameters(model_type, param_grid, train_df, val_df, fit_func=fit_func, predict_func=predict_func, scoring_func=scoring_func)

        final_model = model_type(**best_params)
        final_model.fit(X_train, y_train)

    c_index = final_model.score(X_test, y_test)
    lower, upper = np.percentile(test_df['Days'], [10, 90])
    times  = np.arange(lower, upper + 1)               
    surv_mat = np.vstack([fn(times)             
                          for fn in final_model.predict_survival_function(X_test)])
    ibs  = integrated_brier_score(y_train, y_test, surv_mat, times)
    return {"c_index": c_index, "ibs": ibs}, final_model

def run_pycox(train_df, val_df, test_df, event_col, model_type, train_label, lr = 0.0001, n_discretize=50, final_model = None):
    train_df = deepcopy(train_df)
    val_df = deepcopy(val_df)
    test_df = deepcopy(test_df)

    feature_cols = [col for col in train_df.columns if col not in ['Days', event_col]]

    x_mapper = DataFrameMapper([(col, None) for col in feature_cols])

    x_train = x_mapper.fit_transform(train_df).astype('float32')
    x_val = x_mapper.transform(val_df).astype('float32')
    x_test = x_mapper.transform(test_df).astype('float32')

    get_target = lambda d: (d['Days'].values.astype('float32'), 
                          d[event_col].values.astype('float32'))
    durations_test, events_test = get_target(test_df)

    if final_model is None:
        try:
            labtrans = model_type.label_transform(n_discretize)
            y_train = labtrans.fit_transform(*get_target(train_df))
            y_val = labtrans.transform(*get_target(val_df))
            out_features = labtrans.out_features
            net = tt.practical.MLPVanilla(x_train.shape[1], [x_train.shape[1]*3, x_train.shape[1]*5, x_train.shape[1]*3], out_features, 
                                    batch_norm=True, dropout=0.5)
            final_model = model_type(net, tt.optim.Adam(lr), duration_index=labtrans.cuts, alpha = 0.1)
        except Exception as e:
            y_train = get_target(train_df)
            y_val = get_target(val_df)
            out_features = 1

            net = tt.practical.MLPVanilla(x_train.shape[1], [x_train.shape[1]*3, x_train.shape[1]*5, x_train.shape[1]*3], out_features, 
                                    batch_norm=True, dropout=0.5)

            final_model = model_type(net, tt.optim.Adam(lr))

        

        final_model.fit(x_train, y_train, batch_size=256, epochs=1000,
                callbacks=[tt.cb.EarlyStopping(patience = 50)],
                val_data=(x_val, y_val), verbose=False)    

    if isinstance(final_model, CoxPH):
        final_model.compute_baseline_hazards()
    surv = final_model.predict_surv_df(x_test)
    ev = EvalSurv(surv, durations_test, events_test, censor_surv='km')
    c_index = ev.concordance_td('antolini')

    grid = surv.index
    lower, upper = np.percentile(test_df["Days"], [10, 90])
    times = np.asarray(grid[(grid >= lower) & (grid < upper)])

    if len(times) == 0:
        times = grid

    ibs   = ev.integrated_brier_score(times)  
    return {"c_index": c_index, "ibs": ibs}, final_model


In [None]:
#impute and preprocess

event_col = 'dead'
processed_datasets = {}

impute_methods = ['median', 'mice','hyperimpute']

for method in impute_methods:
    print(f"Running imputation: {method}")
    
    def impute_and_preprocess(df, train_idx, val_idx, test_idx):
        df_copy = df.copy()
        df_copy = df_copy.set_index('ENC_HN', drop=True)

        df_copy = impute_data(df_copy, train_idx, val_idx, test_idx, method=method, impute_feature_only=True, impute_test_set=True)

        df_copy['ENC_HN'] = df_copy.index  
        df_copy = df_copy.set_index('ENC_HN', drop=True).drop(columns=['admit'], errors='ignore')
        
        return preprocess_data(df_copy, train_idx, val_idx, test_idx,
                            continuous_cols=continuous_cols,
                            log_transform_cols=log_transform_cols, verbose=False)

    processed_datasets[method] = {
        "real":     impute_and_preprocess(real_baseline_df, real_train_idx, real_val_idx, real_test_idx),
        "cgan":     impute_and_preprocess(cgan_df, cgan_train_idx, cgan_val_idx, cgan_test_idx),
        "survadgan": impute_and_preprocess(survadgan_df, survadgan_train_idx, survadgan_val_idx, survadgan_test_idx),
        "adgan":    impute_and_preprocess(adgan_df, adgan_train_idx, adgan_val_idx, adgan_test_idx),
        "tvae":     impute_and_preprocess(tvae_df, tvae_train_idx, tvae_val_idx, tvae_test_idx),
        "ddpm":     impute_and_preprocess(ddpm_df, ddpm_train_idx, ddpm_val_idx, ddpm_test_idx),
        "nflow":    impute_and_preprocess(nflow_df, nflow_train_idx, nflow_val_idx, nflow_test_idx)
    }


Running imputation: median
Running imputation: mice
Running imputation: hyperimpute


In [None]:
# survival metrics

models = [
    ("Lifelines CoxPH", run_lifelines, {
        'model_type': CoxPHFitter,
        'param_grid': {
            'penalizer': [ 0.1, 1],
            'l1_ratio': [0, 0.5, 1]
        }
    }),
    # # ("SKSurv CoxPH", run_sksurv, {
    # #     'model_type': CoxPHSurvivalAnalysis,
    # #     'param_grid': {'alpha': [0.1]}
    # # }),
    ("SKSurv RandomForest", run_sksurv, {
        'model_type': RandomSurvivalForest,
        'param_grid': {
            'n_estimators': [5, 20, 50],
            'max_depth': [2, 5, 10],
            'min_samples_split': [2, 5, 10],
            'min_samples_leaf': [1, 2, 4],
            'n_jobs': [-1],  # Use all CPUs
            'random_state': [515616]
        }
    }),
    ("PyCox DeepSurv", run_pycox, {'model_type': CoxPH}),
    ("PyCox DeepHit", run_pycox, {'model_type': DeepHitSingle}),
]

dataset_names = ['real', 'cgan', 'survadgan', 'adgan', 'tvae', 'ddpm', 'nflow']

def process_testdf(test_df, test_scaler, train_scaler, max_train_time):
    test_df = deepcopy(test_df)
    test_df[continuous_cols] = test_scaler.inverse_transform(test_df[continuous_cols])

    test_df[continuous_cols] = train_scaler.transform(test_df[continuous_cols])
    test_df['Days'] = np.minimum(test_df['Days'], max_train_time)
    # scaler = StandardScaler()
    # test_df[continuous_cols] = scaler.fit_transform(test_df[continuous_cols])
    return test_df

for method in impute_methods:
    processed = processed_datasets[method]
    survival_metrics = []

    def add_result(train_name, test_name, model_name, metrics):
        survival_metrics.append({
            "Train": train_name,
            "Test": test_name,
            "Model": model_name,
            "Impute": method,
            "C-Index": metrics["c_index"],
            "IBS": metrics["ibs"]
        })

    train_name = 'real'


    for model_name, model_func, kwargs in models:
        final_model = None
        for test_name in dataset_names:

            train_df, val_df, _, train_scaler = processed[train_name]
            max_train_time = train_df['Days'].max()
            _, _, test_df, test_scaler = processed[test_name]
            test_df = process_testdf(test_df, test_scaler, train_scaler, max_train_time)




            print(f"[Train: {train_name}  Test: {test_name}]  {model_name}")
            try:
                metrics, final_model = model_func(train_df=train_df, val_df=val_df, test_df=test_df,
                                        event_col=event_col, train_label=train_name, final_model = final_model, **kwargs)
                add_result(train_name, test_name, model_name, metrics)
            except Exception as e:
                print(f"[Error] {model_name} on {train_name}-->{test_name}: {e}")
                raise ValueError(f"Error in {model_name} on {train_name} to {test_name}: {e}")

    for model_name, model_func, kwargs in models:
        for train_name in dataset_names:
            if train_name == 'real':
                continue
            test_name = train_name
            train_df, val_df, test_df, train_scaler = processed[train_name]
            max_train_time = train_df['Days'].max()

            print(f"[Train: {train_name}  Test: {test_name}]  {model_name}")
            try:
                metrics, final_model = model_func(train_df=train_df, val_df=val_df, test_df=test_df,
                                        event_col=event_col, train_label=train_name, **kwargs)
                add_result(train_name, test_name, model_name, metrics)

                test_name = 'real'
                print(f"[Train: {train_name}  Test: {test_name}]  {model_name}")
                _, _, test_df, test_scaler = processed[test_name]
                
                test_df = process_testdf(test_df, test_scaler, train_scaler, max_train_time)


                metrics, _ = model_func(train_df=train_df, val_df=val_df, test_df=test_df,
                                        event_col=event_col, train_label=train_name, final_model=final_model,**kwargs)
                add_result(train_name, test_name, model_name, metrics)
            except Exception as e:
                print(f"[Error] {model_name} on {train_name}-->{test_name}: {e}")
                add_result(train_name, test_name, model_name, {"c_index": None, "ibs": None})

    df = pd.DataFrame(survival_metrics)
    df["Train Test"] = "Train " + df["Train"].str.capitalize() + " --> Test " + df["Test"].str.capitalize()

    cindex = df.pivot(index="Train Test", columns="Model", values="C-Index")
    ibs = df.pivot(index="Train Test", columns="Model", values="IBS")


    out_folder = f"survival_results_{method}"
    
    os.makedirs(out_folder, exist_ok=True)
    with pd.ExcelWriter(f"{out_folder}/survival_metrics.xlsx") as xl:
        cindex.to_excel(xl, sheet_name="C-Index")
        ibs.to_excel(xl, sheet_name="IBS")


[Train: real  Test: real]  Lifelines CoxPH
[Train: real  Test: adgan]  Lifelines CoxPH
[Train: real  Test: real]  SKSurv RandomForest
[Train: real  Test: adgan]  SKSurv RandomForest
[Train: real  Test: real]  PyCox DeepSurv
[Train: real  Test: adgan]  PyCox DeepSurv
[Train: real  Test: real]  PyCox DeepHit
[Train: real  Test: adgan]  PyCox DeepHit
[Train: adgan  Test: adgan]  Lifelines CoxPH
[Train: adgan  Test: real]  Lifelines CoxPH
[Train: adgan  Test: adgan]  SKSurv RandomForest
[Train: adgan  Test: real]  SKSurv RandomForest
[Train: adgan  Test: adgan]  PyCox DeepSurv
[Train: adgan  Test: real]  PyCox DeepSurv
[Train: adgan  Test: adgan]  PyCox DeepHit
[Train: adgan  Test: real]  PyCox DeepHit
[Train: real  Test: real]  Lifelines CoxPH
[Train: real  Test: adgan]  Lifelines CoxPH
[Train: real  Test: real]  SKSurv RandomForest
[Train: real  Test: adgan]  SKSurv RandomForest
[Train: real  Test: real]  PyCox DeepSurv
[Train: real  Test: adgan]  PyCox DeepSurv
[Train: real  Test: real]

In [8]:
import pandas as pd

# Alternative more concise version:
def sort_train_test_dataframe_v2(df):
    """
    More concise version of the sorting function with simplified index names
    """
    def sort_key(idx):
        parts = idx.split(' --> ')
        train = parts[0].replace('Train ', '').strip().lower()
        test = parts[1].replace('Test ', '').strip().lower()
        
        if train == 'real' and test == 'real':
            return (1, train, test)
        elif train != 'real' and test == 'real':
            return (2, train, test)  
        elif train == 'real' and test != 'real':
            return (3, train, test)
        else:
            return (4, train, test)
    
    # Sort index by the custom key
    sorted_index = sorted(df.index, key=sort_key)
    sorted_df = df.loc[sorted_index].copy()
    
    # Create new simplified index names
    new_index = []
    for idx in sorted_df.index:
        parts = idx.split(' --> ')
        train = parts[0].replace('Train ', '').strip()
        test = parts[1].replace('Test ', '').strip()
        
        if train.lower() == 'real' and test.lower() == 'real':
            new_index.append('Real')
        elif train.lower() == 'real' and test.lower() != 'real':
            if test.upper() == 'SURVADGAN':
                new_index.append(f'SurvivalGAN')
            elif test.upper() == 'ADGAN':
                new_index.append(f'ADSGAN')
            elif test.upper() == 'CGAN':
                new_index.append(f'CTGAN')
            else:
                new_index.append(f'{test.upper()}')
        elif train.lower() != 'real' and test.lower() == 'real':
            if train.upper() == 'SURVADGAN':
                new_index.append(f'SurvivalGAN')
            elif train.upper() == 'ADGAN':
                new_index.append(f'ADSGAN')
            elif train.upper() == 'CGAN':
                new_index.append(f'CTGAN')
            else:
                new_index.append(f'{train.upper()}')
        else:
            if train.upper() == 'SURVADGAN':
                new_index.append(f'SurvivalGAN')
            elif train.upper() == 'ADGAN':
                new_index.append(f'ADSGAN')
            elif train.upper() == 'CGAN':
                new_index.append(f'CTGAN')
            else:
                new_index.append(f'{train.upper()}')
    
    sorted_df.index = new_index
    return sorted_df

name = 'mice'
df_c = pd.read_excel(f'survival_results_{name}/survival_metrics.xlsx', index_col=0, sheet_name='C-Index')
df_ibs = pd.read_excel(f'survival_results_{name}/survival_metrics.xlsx', index_col=0, sheet_name='IBS')
df_final = df_c.round(2).astype(str) + " ("+df_ibs.round(2).astype(str)+ ")"
df_final1 = sort_train_test_dataframe_v2(df_final)


name = 'median'
df_c = pd.read_excel(f'survival_results_{name}/survival_metrics.xlsx', index_col=0, sheet_name='C-Index')
df_ibs = pd.read_excel(f'survival_results_{name}/survival_metrics.xlsx', index_col=0, sheet_name='IBS')
df_final = df_c.round(2).astype(str) + " ("+df_ibs.round(2).astype(str)+ ")"
df_final2 = sort_train_test_dataframe_v2(df_final)


name='hyperimpute'
df_c = pd.read_excel(f'survival_results_{name}/survival_metrics.xlsx', index_col=0, sheet_name='C-Index')
df_ibs = pd.read_excel(f'survival_results_{name}/survival_metrics.xlsx', index_col=0, sheet_name='IBS')
df_final = df_c.round(2).astype(str) + " ("+df_ibs.round(2).astype(str)+ ")"
df_final3 = sort_train_test_dataframe_v2(df_final)

# save add multiple sheet in excel
with pd.ExcelWriter('results/survival_results_combined.xlsx') as writer:
    df_final1.to_excel(writer, sheet_name='mice')
    df_final2.to_excel(writer, sheet_name='median')
    df_final3.to_excel(writer, sheet_name='hyperimpute')

