# Compare Results of the Binary Python Model to the Original R Folds as in the paper

Plot the AUC of all 5 ensemble (only weights differ) of the split 6 from the paper (andrea).   
Compare the results to the original one achieved with R. Amongst others with a calibration plot.

## Load Libraries and Modules

In [None]:
%matplotlib inline

import os
import h5py
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import zipfile
import random

from sklearn.model_selection import KFold
from sklearn import preprocessing
from sklearn import metrics
from scipy import ndimage

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import tensorflow_probability as tfp
from tensorflow.keras.models import Sequential, Model

print("TF  Version",tf.__version__)

In [None]:
# check and set path before loading modules
print(os.getcwd())
DIR = "/tf/notebooks/brdd/xAI_stroke_3d/"
if os.getcwd() != DIR:
    os.chdir(DIR)

import functions_metrics as fm
import functions_model_definition as md

print("TF  Version",tf.__version__)

## Load Data

In [None]:
# Define the path + output path:
print(os.getcwd())
IMG_DIR = "/tf/notebooks/hezo/stroke_zurich/data/" 
# IMG_DIR2 = "/tf/notebooks/kook/data-sets/stroke-lh/"
DATA_DIR = "/tf/notebooks/hezo/stroke_zurich/data/" 
WEIGHT_DIR = "/tf/notebooks/brdd/xAI_stroke_3d/weights/"


In [None]:
model_name = "3d_cnn_binary_model_split6_unnormalized_avg_layer_paper_model_sigmoid_activation_"

layer_connection = "globalAveragePooling"
last_activation = "sigmoid"

In [None]:
with h5py.File(IMG_DIR + 'dicom_2d_192x192x3_clean_interpolated_18_02_2021_preprocessed2.h5', "r") as h5:
# with h5py.File(IMG_DIR2 + 'dicom-3d.h5', "r") as h5:
# both images are the same
    X_in = h5["X"][:]
    Y_img = h5["Y_img"][:]
    Y_pat = h5["Y_pat"][:]
    pat = h5["pat"][:]
    
X_in = np.expand_dims(X_in, axis = 4)
print(X_in.shape, X_in.min(), X_in.max(), X_in.mean(), X_in.std())

In [None]:
dat = pd.read_csv(IMG_DIR + 'baseline_data_zurich_prepared.csv', sep=",")
dat

In [None]:
andrea_splits = pd.read_csv('/tf/notebooks/brdd/xAI_stroke_3d/data/andrea_splits.csv', 
                            sep='\,', header = None, engine = 'python', 
                            usecols = [1,2,3]).apply(lambda x: x.str.replace(r"\"",""))
andrea_splits.columns = andrea_splits.iloc[0]
andrea_splits.drop(index=0, inplace=True)
andrea_splits = andrea_splits.astype({'idx': 'int32', 'spl': 'int32'})
split6 = andrea_splits.loc[andrea_splits['spl']==6]
split6

In [None]:
n = []
for p in pat:
    if p in dat.p_id.values:
        n.append(p)
n = len(n)

# match image and tabular data
X = np.zeros((n, X_in.shape[1], X_in.shape[2], X_in.shape[3], X_in.shape[4]))
X_tab = np.zeros((n, 13))
Y_mrs = np.zeros((n))
Y_eventtia = np.zeros((n))
p_id = np.zeros((n))

i = 0
for j, p in enumerate(pat):
    if p in dat.p_id.values:
        k = np.where(dat.p_id.values == p)[0]
        X_tab[i,:] = dat.loc[k,["age", "sexm", "nihss_baseline", "mrs_before",
                               "stroke_beforey", "tia_beforey", "ich_beforey", 
                               "rf_hypertoniay", "rf_diabetesy", "rf_hypercholesterolemiay", 
                               "rf_smokery", "rf_atrial_fibrillationy", "rf_chdy"]]
        X[i] = X_in[j]
        p_id[i] = pat[j]
        Y_eventtia[i] = Y_pat[j]
        Y_mrs[i] = dat.loc[k, "mrs3"]
        i += 1
X_tab.shape

In [None]:
Y_new = []
for element in Y_mrs:
    if element in [0,1,2]:
        Y_new.append(0)
    else:
        Y_new.append(1)
Y_new = np.array(Y_new)

In [None]:
# Split data into training set and test set "split6"

X = np.squeeze(X)
X = np.float32(X)

train_idx = split6["idx"][split6['type'] == "train"].to_numpy() -1 
valid_idx = split6["idx"][split6['type'] == "val"].to_numpy() - 1 
test_idx = split6["idx"][split6['type'] == "test"].to_numpy() - 1 

X_train = X[train_idx]
# y_train = Y_eventtia[train_idx]
y_train = Y_new[train_idx]
X_valid = X[valid_idx]
# y_valid = Y_eventtia[valid_idx]
y_valid = Y_new[valid_idx]
X_test = X[test_idx]
# y_test = Y_eventtia[test_idx]
y_test = Y_new[test_idx]

print(X_train.shape, X_valid.shape, X_test.shape)
print(y_train.shape, y_valid.shape, y_test.shape)

## Model

In [None]:
input_dim = np.expand_dims(X_train, axis = -1).shape[1:]
output_dim = 1

# call model
model_3d = md.stroke_binary_3d(input_dim = input_dim,
                               output_dim = output_dim,
                               layer_connection = layer_connection,
                               last_activation = last_activation)
model_3d.summary()


In [None]:
model_3d.compile(
    loss="binary_crossentropy",
    optimizer=keras.optimizers.Adam(learning_rate=5*1e-5),
    metrics=["acc", tf.keras.metrics.AUC()]
)

## Save Results per Model

Check first for one model:

In [None]:
model_3d.load_weights(WEIGHT_DIR + "andrea_split/" + model_name + "14" + ".h5")
# model_3d.evaluate(x=X_test, y=y_test)
# y_pred = model_3d.predict(X_test)

In [None]:
model_3d.evaluate(x=X_test, y=y_test)

Loop over all models and save the results.

In [None]:
model_endings = ["10", "11", "12", "13", "14"]
y_preds = []
fprs = []
tprs = []
aucs = []
cal_plot_datas = []

# ROC-Curve
plt.title('Receiver Operating Characteristic')

for model_ending in model_endings:
    model_3d.load_weights(WEIGHT_DIR + "andrea_split/" + model_name + model_ending + ".h5")
    y_pred = model_3d.predict(X_test)
    y_preds.append(y_pred)
    
    fpr, tpr, threshold = metrics.roc_curve(y_test, (y_pred))
    roc_auc = metrics.auc(fpr, tpr)
    fprs.append(fpr)
    tprs.append(tpr)
    aucs.append(roc_auc)
    
    plt.plot(fpr, tpr, label = 'AUC = %0.2f' % roc_auc)
    
    cal_plot_datas.append(
        fm.cal_plot_data_prep(y_pred, y_test)
    )    

plt.legend(loc = 'lower right')
plt.plot([0, 1], [0, 1],'b--')
plt.xlim([0, 1])
plt.ylim([0, 1])
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.show()

# Compare to Andrea

## Data Preparation

In [None]:
andrea_results_ens1 = pd.read_csv('/tf/notebooks/brdd/xAI_stroke_3d/data/stroke_cimrsbinary_lossnll_wsno_augyes_cdftest_spl6_ens1.csv'
                            ).rename(columns={"Unnamed: 0": "p_idx"})
andrea_results_ens2 = pd.read_csv('/tf/notebooks/brdd/xAI_stroke_3d/data/stroke_cimrsbinary_lossnll_wsno_augyes_cdftest_spl6_ens2.csv'
                            ).rename(columns={"Unnamed: 0": "p_idx"})
andrea_results_ens3 = pd.read_csv('/tf/notebooks/brdd/xAI_stroke_3d/data/stroke_cimrsbinary_lossnll_wsno_augyes_cdftest_spl6_ens3.csv'
                            ).rename(columns={"Unnamed: 0": "p_idx"})
andrea_results_ens4 = pd.read_csv('/tf/notebooks/brdd/xAI_stroke_3d/data/stroke_cimrsbinary_lossnll_wsno_augyes_cdftest_spl6_ens4.csv'
                            ).rename(columns={"Unnamed: 0": "p_idx"})
andrea_results_ens5 = pd.read_csv('/tf/notebooks/brdd/xAI_stroke_3d/data/stroke_cimrsbinary_lossnll_wsno_augyes_cdftest_spl6_ens5.csv'
                            ).rename(columns={"Unnamed: 0": "p_idx"})

In [None]:
## NOT NEEDED: same as above
# andrea_results_trafo = pd.read_csv('/tf/notebooks/brdd/xAI_stroke_3d/data/stroke_merged_bincdf_cimrsbinary.csv')
# andrea_results_trafo = andrea_results_trafo[
#     (andrea_results_trafo["loss"] == "nll") &
#     (andrea_results_trafo["type"] == "test") &
#     (andrea_results_trafo["spl"] == 6)]

In [None]:
andrea_calplot_dat_spl = pd.read_csv('/tf/notebooks/brdd/xAI_stroke_3d/data/bincal_splnll.csv')
andrea_calplot_cibinary_spl = andrea_calplot_dat_spl[(andrea_calplot_dat_spl["mod"] == "cimrsbinary") &
                                                     (andrea_calplot_dat_spl["method"] == "trafo") &
                                                     (andrea_calplot_dat_spl["weights"] == "equal")]
andrea_calplot_cibinary_spl.head()

In [None]:
andrea_calplot_dat = pd.read_csv('/tf/notebooks/brdd/xAI_stroke_3d/data/bincal_avgnll.csv')
andrea_calplot_cibinary_avg = andrea_calplot_dat[(andrea_calplot_dat["mod"] == "cimrsbinary") &
                                                 (andrea_calplot_dat["method"] == "trafo") &
                                                 (andrea_calplot_dat["weights"] == "equal")]
andrea_calplot_cibinary_avg
# cal_plot(andrea_calplot_cibinary_avg, "midpoint", "prop", "lwr", "upr")

In [None]:
# fake trafo and linear averaging of results on split 6
y_preds = np.concatenate(y_preds, axis = 1)
y_pred_linear_avg = np.mean(y_preds, axis = 1)
y_pred_trafo_avg = fm.sigmoid(np.mean(fm.inverse_sigmoid(y_preds), axis = 1))

In [None]:
# calculate calibration plots
cal_plot_linear = fm.cal_plot_data_prep(y_pred_linear_avg, y_test)
cal_plot_trafo = fm.cal_plot_data_prep(y_pred_trafo_avg, y_test)

In [None]:
results = pd.DataFrame(
    {"p_idx": test_idx+1,
     "p_id": p_id[test_idx],
     "mrs": Y_mrs[test_idx],
     "unfavorable": y_test,
     "pred_prob_1": y_preds[:,0], 
     "pred_prob_2": y_preds[:,1], 
     "pred_prob_3": y_preds[:,2], 
     "pred_prob_4": y_preds[:,3], 
     "pred_prob_5": y_preds[:,4], 
     "pred_prob_linear" : y_pred_linear_avg,
     "pred_prob_trafo" : y_pred_trafo_avg,
     "andrea_pred_prob_ens1": 1-andrea_results_ens1["V2"],
     "andrea_pred_prob_ens2": 1-andrea_results_ens2["V2"],
     "andrea_pred_prob_ens3": 1-andrea_results_ens3["V2"],
     "andrea_pred_prob_ens4": 1-andrea_results_ens4["V2"],
     "andrea_pred_prob_ens5": 1-andrea_results_ens5["V2"]
    }
)
results.head()

In [None]:
# do fake trafo averaging of andreas results
results["andrea_pred_prob_trafo"] = fm.sigmoid(np.mean(fm.inverse_sigmoid(results[
    ["andrea_pred_prob_ens1", "andrea_pred_prob_ens2", "andrea_pred_prob_ens3", "andrea_pred_prob_ens4", "andrea_pred_prob_ens5"]
    ]), axis = 1))
andrea_calplot_spl6_new = fm.cal_plot_data_prep(results["andrea_pred_prob_trafo"], y_test)

In [None]:
andrea_calplot_spl6_new

## Results of Ensemble

In [None]:
# Results Andrea
fm.calc_metrics(results["unfavorable"], results["andrea_pred_prob_trafo"])

In [None]:
 # Results Python 
fm.calc_metrics(results["unfavorable"], results["pred_prob_trafo"])

In [None]:
# Plot AUC of Python and Andrea's results
fpr, tpr, threshold = metrics.roc_curve(results["unfavorable"], results["pred_prob_trafo"])
roc_auc = metrics.auc(fpr, tpr)  
plt.plot(fpr, tpr, label = 'AUC Python = %0.2f' % roc_auc)
fpr, tpr, threshold = metrics.roc_curve(results["unfavorable"], results["andrea_pred_prob_trafo"])
roc_auc = metrics.auc(fpr, tpr)  
plt.plot(fpr, tpr, label = 'AUC Andrea = %0.2f' % roc_auc)

plt.legend(loc = 'lower right')
plt.plot([0, 1], [0, 1],'b--')
plt.xlim([0, 1])
plt.ylim([0, 1])
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.show()

In [None]:
threshold

## Calibration Plots

In [None]:
!pip install seaborn
import seaborn as sns

In [None]:
# calplot of each ensemble with linear (green) and trafo (orange) averaged for split 6
for i in range(5):
    fm.cal_plot(cal_plot_datas[i], 
             "predicted_probability_middle", "observed_proportion",
                        "observed_proportion_lower", "observed_proportion_upper", alpha = .35, show = False)
fm.cal_plot(cal_plot_linear, "predicted_probability_middle", "observed_proportion",
                        "observed_proportion_lower", "observed_proportion_upper", col = "green", show = False)
# # additionaly in blue averaged over all calplots
# fm.cal_plot(sum(cal_plot_datas)/5, "predicted_probability_middle", "observed_proportion",
#                         "observed_proportion_lower", "observed_proportion_upper", col = "blue", show = False)
fm.cal_plot(cal_plot_trafo, "predicted_probability_middle", "observed_proportion",
                        "observed_proportion_lower", "observed_proportion_upper", col = "orange")

In [None]:
andrea_calplot_cibinary_spl["bin_num"] = np.array(list(range(4))*6)
andrea_calplot_cibinary_spl_avg = andrea_calplot_cibinary_spl.groupby("bin_num")[["prop", "lwr", "upr", "midpoint"]].mean()

In [None]:
# plot as is in paper: per split (already trafo over each split) and averaged
# in green averaged directly shouldn't be visible as it should be same as the blue one
# in orange trafo averaged of own implementation of split 6
for i in range(6):
    fm.cal_plot(andrea_calplot_cibinary_spl[andrea_calplot_cibinary_spl["spl"] == i+1], 
             "midpoint", "prop", "lwr", "upr", alpha = .35, show = False)
fm.cal_plot(andrea_calplot_cibinary_spl_avg, "midpoint", "prop", "lwr", "upr", col = "green", show = False)
fm.cal_plot(andrea_calplot_cibinary_avg, "midpoint", "prop", "lwr", "upr", show = False)

fm.cal_plot(cal_plot_trafo, "predicted_probability_middle", "observed_proportion",
                        "observed_proportion_lower", "observed_proportion_upper", col = "orange")

In [None]:
# cal plot comparision of split 6 
# blue: andrea original
# green: andrea calculated numbers based on results on each ensemble, trafo averaged
# orange: own implementation, trafo averaged
# -------------------------------
# green and blue should be same
fm.cal_plot(andrea_calplot_cibinary_spl[andrea_calplot_cibinary_spl["spl"] == 6], 
             "midpoint", "prop", "lwr", "upr", show = False)
fm.cal_plot(andrea_calplot_spl6_new, 
         "predicted_probability_middle", "observed_proportion", "observed_proportion_lower", "observed_proportion_upper", 
         col = "green", show = False)
fm.cal_plot(cal_plot_trafo, 
         "predicted_probability_middle", "observed_proportion", "observed_proportion_lower", "observed_proportion_upper", 
         col = "orange")


In [None]:
andrea_calplot_spl6_new

In [None]:
andrea_calplot_cibinary_spl[andrea_calplot_cibinary_spl["spl"] == 6]

## Patient Comparison

Multiple scatter plots with different comparison methods

In [None]:
g = sns.scatterplot(data=results, x="pred_prob_trafo", y="pred_prob_linear", hue = "unfavorable")
plt.legend(loc='lower right')
g.set(ylim=(0, 1), xlim=(0,1))
g.plot([0,1], [0,1], "r--")

In [None]:
g = sns.scatterplot(data=results, x="pred_prob_trafo", y="andrea_pred_prob_trafo", hue = "unfavorable")
plt.legend(loc='lower right')
g.set(ylim=(0, 1), xlim=(0,1))
g.plot([0,1], [0,1], "r--")

In [None]:
g = sns.scatterplot(data=results, x="pred_prob_2", y="pred_prob_5", hue = "unfavorable")
plt.legend(loc='lower right')
g.set(ylim=(0, 1), xlim=(0,1))
g.plot([0,1], [0,1], "r--")

In [None]:
g = sns.scatterplot(data=results, x="andrea_pred_prob_ens2", y="andrea_pred_prob_ens5", hue = "unfavorable")
plt.legend(loc='lower right')
g.set(ylim=(0, 1), xlim=(0,1))
g.plot([0,1], [0,1], "r--")

In [None]:
g = sns.scatterplot(data=results, x="pred_prob_1", y="andrea_pred_prob_ens1", hue = "unfavorable")
plt.legend(loc='lower right')
g.set(ylim=(0, 1), xlim=(0,1))
g.plot([0,1], [0,1], "r--")