In [None]:
# versions: pandas==2.1.1, numpy==1.26.4, scikit-survival==0.24.1

In [None]:
import pandas as pd
import os
import pickle
import numpy as np
from sksurv.metrics import concordance_index_censored,concordance_index_ipcw,brier_score,cumulative_dynamic_auc,brier_score,integrated_brier_score


In [None]:
OASIS_DATASET_FILE="./data/OASIS_all.csv"
FEATURE_SET = "rBL"#one of "rBL", "rBL+VOL", "rBL+RAD", "rBL+VOL+RAD"
DATASET_SEL = "CN+MCI" #one of "CN+MCI", "MCI"
MODEL="CoxnetSurvivalAnalysis" # one of "CoxnetSurvivalAnalysis", "CoxPHSurvivalAnalysis", "ExtraSurvivalTrees", "GradientBoostingSurvivalAnalysis", "RandomSurvivalForest"
SAMPLE_WEIGHTS=True # True or False


In [None]:
if SAMPLE_WEIGHTS:
    MODEL_DIR = os.path.join("./results/"+MODEL+"sw_"+DATASET_SEL+"_"+FEATURE_SET+"/")
else:
    MODEL_DIR = os.path.join("./results/"+MODEL+"_"+DATASET_SEL+"_"+FEATURE_SET+"/")

In [None]:
OASIS_dataset=pd.read_csv(OASIS_DATASET_FILE)
OASIS_dataset=pd.get_dummies(OASIS_dataset, columns=["APOE4","PTGENDER","MagStrength"])
OASIS_dataset=OASIS_dataset.rename({"LDELTOTAL":"LDELTOT","MMSCORE":"MMSE"},axis=1)
OASIS_dataset=OASIS_dataset.set_index(["PTID"])

In [None]:
def ipcw_brier_scorer(estimator, X, y):
    scores = []
    for train_idx, test_idx in index_pairs:  
        X_tr, X_te = X.iloc[train_idx], X.iloc[test_idx]
        y_tr, y_te = y[train_idx], y[test_idx]
        w_tr, _ = weights[train_idx], weights[test_idx]

        estimator.fit(X_tr, y_tr, sample_weight=w_tr)
        surv_fns = estimator.predict_survival_function(X_te)
        preds = np.asarray([[fn(t) for t in [4.0, 8.0]] for fn in surv_fns])

        score = integrated_brier_score(y_tr, y_te, preds, [4.0, 8.0])
        scores.append(score)
    return -np.mean(scores)

In [None]:
if(os.path.exists(MODEL_DIR+"/normalization_models.sav")):
    filename=MODEL_DIR+"/normalization_models.sav"
    normalization_models=pickle.load(open(filename, "rb"))
    for col in normalization_models.keys():
        x_test=OASIS_dataset["MagStrength_3.0"].astype(int).to_numpy().reshape((-1, 1))
        model = normalization_models[col]
        pred_test=model.predict(x_test)
        OASIS_dataset.loc[:,col]=OASIS_dataset[col]-pred_test

In [None]:
filename=MODEL_DIR+"model_bayes_optimization.sav"
clf=pickle.load(open(filename, "rb"))

In [None]:
filename=MODEL_DIR+"preprocessing_entire_training.sav"
dict_preproc=pickle.load(open(filename, "rb"))

In [None]:
features=dict_preproc["imputation"].get_feature_names_out()

In [None]:
df_ges=OASIS_dataset.filter(features,axis=1)

In [None]:
imp = dict_preproc["imputation"]
X_test_pre_imp=imp.transform(df_ges)
scaler = dict_preproc["scaler"]
X_test_pre=scaler.transform(X_test_pre_imp)

In [None]:
dataNamed_test=pd.DataFrame(X_test_pre,columns=dict_preproc["imputation"].get_feature_names_out())

In [None]:
dataNamed_test["PTID"]=df_ges.index.get_level_values(0).tolist()
dataNamed_test=dataNamed_test.set_index(["PTID"])

In [None]:
filename=MODEL_DIR+"discr_ind.sav"
discr_ind=pickle.load(open(filename, "rb"))
for ind in discr_ind:
    col=imp.get_feature_names_out()[ind]
    dataNamed_test[col]=X_test_pre_imp[:,ind]

In [None]:
trainingDSCSV=MODEL_DIR+"/training.csv"
df_train=pd.read_csv(trainingDSCSV)
df_train=df_train.set_index(["PTID","IMAGEUID"])

In [None]:
test=dataNamed_test.filter(df_train.columns,axis=1)

In [None]:
list_Y_test=list()
for i, dat in OASIS_dataset.iterrows():
    statNew=False
    time=dat.Time
    if (dat.Status=="sCN") or (dat.Status=="sMCI") or (dat.Status=="CNtoMCI") or (dat.Status=="uMCINoAD") or (dat.Status=="uCNNoAD") or (dat.Status=="CNtoMCI"):
        statNew=False
    else:
        statNew=True
    list_Y_test.append((statNew,time))

In [None]:
dt=np.dtype("bool,float")

y_test=np.array(list_Y_test,dtype=dt)

In [None]:
df_res=pd.read_csv(MODEL_DIR+"res_table.csv")

In [None]:
filename=MODEL_DIR+"y_train.sav"
y_train=pickle.load(open(filename, "rb"))

In [None]:
if SAMPLE_WEIGHTS:
    survival_functions=clf.best_estimator_.predict_survival_function(test)
    surv_4 = np.vstack([chf(4.0) for chf in survival_functions])
    surv_8 = np.vstack([chf(8.0) for chf in survival_functions])
    
    
    status=[i[0] for i in y_test]
    time=[i[1] for i in y_test]
    
    clf_chf_funcs = clf.best_estimator_.predict_cumulative_hazard_function(test, return_array=False)
    clf_risk_scores_4 = np.vstack([chf(4.0) for chf in clf_chf_funcs])
    clf_risk_scores_8 = np.vstack([chf(8.0) for chf in clf_chf_funcs])
    
    cic_4=concordance_index_censored(status, time, clf_risk_scores_4[:,0].tolist())[0]
    cic_8=concordance_index_censored(status, time, clf_risk_scores_8[:,0].tolist())[0]
    
    cii_4=concordance_index_ipcw(y_train,y_test,clf_risk_scores_4[:,0].tolist(),tau=4.0)[0]
    cii_8=concordance_index_ipcw(y_train,y_test,clf_risk_scores_8[:,0].tolist(),tau=8.0)[0]
    
    auc_4=cumulative_dynamic_auc(y_train, y_test, clf_risk_scores_4[:,0].tolist(), 4.0)[1]
    auc_8=cumulative_dynamic_auc(y_train, y_test, clf_risk_scores_8[:,0].tolist(), 8.0)[1]
    
    bs_4=brier_score(y_train, y_test, surv_4[:,0], 4.0)[1][0]
    bs_8=brier_score(y_train, y_test, surv_8[:,0], 8.0)[1][0]
    
    
    print("cic_4: "+str(round(cic_4*100,3)))
    print("cic_8: "+str(round(cic_8*100,3)))
    print("cii_4: "+str(round(cii_4*100,3)))
    print("cii_8: "+str(round(cii_8*100,3)))
    print("auc_4: "+str(round(auc_4*100,3)))
    print("auc_8: "+str(round(auc_8*100,3)))
    print("bs_4: "+str(round(bs_4*100,3)))
    print("bs_8: "+str(round(bs_8*100,3)))
else:
    survival_functions=clf.best_estimator_.predict_survival_function(test)
    surv_4 = np.vstack([chf(4.0) for chf in survival_functions])
    surv_8 = np.vstack([chf(8.0) for chf in survival_functions])
    
    
    status=[i[0] for i in y_test]
    time=[i[1] for i in y_test]
    
    clf_chf_funcs = clf.best_estimator_.estimator_.predict_cumulative_hazard_function(test, return_array=False)
    clf_risk_scores_4 = np.vstack([chf(4.0) for chf in clf_chf_funcs])
    clf_risk_scores_8 = np.vstack([chf(8.0) for chf in clf_chf_funcs])
    
    cic_4=concordance_index_censored(status, time, clf_risk_scores_4[:,0].tolist())[0]
    cic_8=concordance_index_censored(status, time, clf_risk_scores_8[:,0].tolist())[0]
    
    cii_4=concordance_index_ipcw(y_train,y_test,clf_risk_scores_4[:,0].tolist(),tau=4.0)[0]
    cii_8=concordance_index_ipcw(y_train,y_test,clf_risk_scores_8[:,0].tolist(),tau=8.0)[0]
    
    auc_4=cumulative_dynamic_auc(y_train, y_test, clf_risk_scores_4[:,0].tolist(), 4.0)[1]
    auc_8=cumulative_dynamic_auc(y_train, y_test, clf_risk_scores_8[:,0].tolist(), 8.0)[1]
    
    bs_4=brier_score(y_train, y_test, surv_4[:,0], 4.0)[1][0]
    bs_8=brier_score(y_train, y_test, surv_8[:,0], 8.0)[1][0]
    
    
    print("cic_4: "+str(round(cic_4*100,3)))
    print("cic_8: "+str(round(cic_8*100,3)))
    print("cii_4: "+str(round(cii_4*100,3)))
    print("cii_8: "+str(round(cii_8*100,3)))
    print("auc_4: "+str(round(auc_4*100,3)))
    print("auc_8: "+str(round(auc_8*100,3)))
    print("bs_4: "+str(round(bs_4*100,3)))
    print("bs_8: "+str(round(bs_8*100,3)))

In [None]:
df=pd.DataFrame({"model":[MODEL_DIR.split("/")[2].split("_")[0]],"features":[FEATURE_SET],"problem":[DATASET_SEL],"concordance_index_censored_4":[cic_4],"concordance_index_censored_8":[cic_8],"concordance_index_ipcw_4":[cii_4],"concordance_index_ipcw_8":[cii_8],"cumulative_dynamic_auc_4": [auc_4],"cumulative_dynamic_auc_8": [auc_8],"brier_score_4":[bs_4],"brier_score_8":[bs_8]})
df.to_csv(MODEL_DIR+"res_table_OASIS.csv")