In [None]:
import pandas as pd
import numpy as np
from sklearn.calibration import calibration_curve
import matplotlib.lines as mlines
import matplotlib.transforms as mtransforms
from matplotlib import pyplot as plt
import seaborn as sns
from sklearn.metrics import auc, roc_curve, precision_recall_curve

In [None]:
ecg_per_sample = pd.read_csv("Plots_performance_test_set/ecg_predictions_per_sample.csv", sep =",")
ecg_per_patient = pd.read_csv("Plots_performance_test_set/ecg_predictions_per_patient.csv", sep =",")
ecg_prs_per_sample = pd.read_csv("Plots_performance_test_set/ecg_prs_predictions_per_sample.csv", sep =",")
ecg_prs_per_patient = pd.read_csv("Plots_performance_test_set/ecg_prs_predictions_per_patient.csv", sep =",")

In [None]:
ecg_per_sample_labels = ecg_per_sample["label_per_sample"]
ecg_per_sample_proba = ecg_per_sample["prediction_per_sample"]

ecg_per_patient_labels = ecg_per_patient["label_per_sample"]
ecg_per_patient_proba = ecg_per_patient["prediction_per_sample"]

ecg_prs_per_sample_labels = ecg_prs_per_sample["label_per_sample"]
ecg_prs_per_sample_proba = ecg_prs_per_sample["prediction_per_sample"]

ecg_prs_per_patient_labels = ecg_prs_per_patient["label_per_sample"]
ecg_prs_per_patient_proba = ecg_prs_per_patient["prediction_per_sample"]

In [None]:
def bin_total(y_true, y_prob, n_bins):
    bins = np.linspace(0., 1. + 1e-8, n_bins + 1)

    # In sklearn.calibration.calibration_curve,
    # the last value in the array is always 0.
    binids = np.digitize(y_prob, bins) - 1
    return np.bincount(binids, minlength=len(bins))

In [None]:
# bin data and normalise counts
def counts_to_percentages(probabilities):
    bin0_01 = 0
    bin01_02=0
    bin02_03=0
    bin03_04=0
    bin04_05=0
    bin05_06=0
    bin06_07=0
    bin07_08=0
    bin08_09=0
    bin09_1=0 
    
    for val in probabilities:
    
        if val <0.1:
            bin0_01 = bin0_01 + 1
    
        elif val >= 0.1 and val <0.2:
            bin01_02= bin01_02 +1 
    
        elif val >= 0.2 and val <0.3:
            bin02_03= bin02_03 +1 
    
        elif val >= 0.3 and val <0.4:
                bin03_04= bin03_04 +1
    
        elif val >= 0.4 and val <0.5:
                bin04_05= bin04_05 +1 
    
        elif val >= 0.5 and val <0.6:
                bin05_06= bin05_06 +1 
    
        elif val >= 0.6 and val <0.7:
                    bin06_07= bin06_07 +1 
    
        elif val >= 0.7 and val <0.8:
                    bin07_08= bin07_08 +1 
    
        elif val >= 0.8 and val <0.9:
                    bin08_09= bin08_09 +1 
    
        elif val >= 0.9 :
                    bin09_1= bin09_1 +1 
                
    counts = [bin0_01, bin01_02, bin02_03, bin03_04, bin04_05,
             bin05_06, bin06_07, bin07_08, bin08_09, bin09_1]    
    
    percentages = counts/np.sum(counts)  *100
    
    return percentages
    

In [None]:
#plot all calibration plots and histograms together
def calibration_together (ecg_labels, ecg_probas, ecg_prs_labels, ecg_prs_probas, plot_name): 
    print("plot curves and save in one png file")
    #save three plots in one png file
    fig_index = 1
    
    #save three plots in one png file
    fig, (ax1, ax2) = plt.subplots(nrows=2, figsize=(15, 20))    
    
    # compute calibration curve ECG model
    y, x = calibration_curve(ecg_labels, ecg_probas, n_bins=10)
    #find out which one are the missing bins
    bin_array = bin_total(y, x , n_bins=10)   
    ax1.plot(x, y, 'C2',marker='o', linewidth=1.2, label='ECG model', color = "blue")
    ax1.set(xlabel= 'Predicted score', ylabel= 'True frequency in each bin')

    # plot calibration curve ECG PRS model
    y, x = calibration_curve(ecg_prs_labels, ecg_prs_probas, n_bins=10)
    #find out which one are the missing bins
    bin_array2 = bin_total(y, x , n_bins=10)
    ax1.plot(x, y, 'C3',marker='o', linewidth=1, label='ECG+PRS model', color = "orange")
    
    line = mlines.Line2D([0, 1], [0, 1], color='black', linestyle='--', linewidth=0.9, label= "Perfectly calibrated")
    transform = ax1.transAxes
    line.set_transform(transform)
    ax1.add_line(line)     
    ax1.legend(loc="upper left") 
    #plt.rcParams.update({"font.size": 25})
  
    #HISTOGRAMS    
    x = np.arange(0,1,0.1)

    # histogram ECG model
    y = counts_to_percentages(ecg_probas) #if instead of % want values in [0,1], do: y = counts_to_percentages(LR_proba)/100
        
    ax2.hist(x, range=(0, 1), bins=10, weights= y, label='ECG model',
                 histtype="step", lw=2.5, color = "blue") #change density = True to normalise area under the hist to 1
    
    #hist ECG PRS model
    y = counts_to_percentages(ecg_prs_probas) #if instead of % want values in [0,1], do: y = counts_to_percentages(GBT_proba)/100
    ax2.hist(x, range=(0, 1), bins=10, weights= y, label='ECG+PRS model',
                 histtype="step", lw=1.5, color = "orange")
    
    ax2.set_xlabel("Mean predicted score")
    ax2.set_ylabel("Percentage of counts")
    ax2.legend(loc="upper center")
    ax2.set_ylim([0,100]) #if instead of % want probabilities, change to [0,1] 
    #plt.rcParams.update({"font.size": 25})
    #plt.tight_layout()
    plt.rc("axes", labelsize=25)
    plt.rc("legend", fontsize=20)
    plt.rc("xtick", labelsize = 20)
    plt.rc("ytick", labelsize = 20)
    plt.savefig("Plots_performance_test_set/" + plot_name+".png")
    #plt.show()   


In [None]:
calibration_together(ecg_per_sample_labels, ecg_per_sample_proba, ecg_prs_per_sample_labels, ecg_prs_per_sample_proba,"Calibration curves for ECG and ECG-PRS mdoels per sample")

In [None]:
calibration_together(ecg_per_sample_labels, ecg_per_sample_proba, ecg_prs_per_sample_labels, ecg_prs_per_sample_proba,"Calibration curves for ECG and ECG-PRS mdoels per sample")

In [None]:
calibration_together(ecg_per_patient_labels, ecg_per_patient_proba, ecg_prs_per_patient_labels, ecg_prs_per_patient_proba,"Calibration curves for ECG and ECG-PRS mdoels per patient")

# ROC

In [None]:
def ROC_curves(ecg_labels, ecg_probas, ecg_prs_labels, ecg_prs_probas, plot_name):
    f, ax = plt.subplots(figsize=(10,10))
    ecg_fpr, ecg_tpr, thresholds = roc_curve(ecg_labels, ecg_probas)
    ecg_auc_coef = round(auc(ecg_fpr, ecg_tpr),2)
    ax.plot(ecg_fpr, ecg_tpr, marker=".", label = " ECG model - AUC: " + str(ecg_auc_coef), color = "blue")
    
    ecg_prs_fpr, ecg_prs_tpr, thresholds_prs = roc_curve(ecg_prs_labels, ecg_prs_probas)
    ecg_prs_auc_coef = round(auc(ecg_prs_fpr, ecg_prs_tpr),2)
    ax.plot(ecg_prs_fpr, ecg_prs_tpr, marker=".", label = " ECG+PRS model - AUC: " + str(ecg_prs_auc_coef), color = "orange")
    
    ax.plot([0,1], [0,1], transform = ax.transAxes, linestyle="--", label="Random Classifier")
    ax.set_ylim(bottom=0, top = 1)
    ax.set_xlim([0,1])
    ax.set_xlabel("False Positive Rate")
    ax.set_ylabel("True Positive Rate")
    #ax.set_title("ROC")
    plt.rc("axes", labelsize=25)
    plt.rc("legend", fontsize=20)
    #plt.rcParams.update({"font.size": 25})
    ax.legend()
    plt.savefig("Plots_performance_test_set/" + plot_name)

In [None]:
ROC_curves(ecg_per_sample_labels, ecg_per_sample_proba, ecg_prs_per_sample_labels, ecg_prs_per_sample_proba,"ROC curves for ECG and ECG-PRS mdoels per sample")

In [None]:
ROC_curves(ecg_per_sample_labels, ecg_per_sample_proba, ecg_prs_per_sample_labels, ecg_prs_per_sample_proba,"ROC curves for ECG and ECG-PRS mdoels per sample")

In [None]:
ROC_curves(ecg_per_patient_labels, ecg_per_patient_proba, ecg_prs_per_patient_labels, ecg_prs_per_patient_proba,"ROC curves for ECG and ECG-PRS mdoels per patient")

In [None]:
def PR_curves(ecg_labels, ecg_probas, ecg_prs_labels, ecg_prs_probas, plot_name):
    f, ax = plt.subplots(figsize=(10,10))
    ecg_precision, ecg_recall, thresholds = precision_recall_curve(ecg_labels, ecg_probas)
    ecg_auprc_coef = round(auc(ecg_recall, ecg_precision),2)
    ax.plot(ecg_recall, ecg_precision, marker=".", label = " ECG model - AUPRC: " + str(ecg_auprc_coef), color = "blue")
    
    ecg_prs_precision, ecg_prs_recall, thresholds_prs = precision_recall_curve(ecg_prs_labels, ecg_prs_probas)
    ecg_prs_auprc_coef = round(auc(ecg_prs_recall, ecg_prs_precision),2)
    ax.plot(ecg_prs_recall, ecg_prs_precision, marker=".", label = " ECG+PRS model - AUPRC: " + str(ecg_prs_auprc_coef), color = "orange")
    
    ax.set_ylim(bottom=0, top = 1)
    ax.set_xlim([0,1])
    ax.set_xlabel("Recall (Positive label: BrP)")
    ax.set_ylabel("Precision (Positive label: BrP)")
    ax.legend()
    #plt.rcParams.update({"font.size": 25})
    plt.rc("axes", labelsize=25)
    plt.rc("legend", fontsize=20)
    plt.savefig("Plots_performance_test_set/" + plot_name)

In [None]:
PR_curves(ecg_per_sample_labels, ecg_per_sample_proba, ecg_prs_per_sample_labels, ecg_prs_per_sample_proba,"PR curves for ECG and ECG-PRS mdoels per sample")

In [None]:
PR_curves(ecg_per_patient_labels, ecg_per_patient_proba, ecg_prs_per_patient_labels, ecg_prs_per_patient_proba,"PR curves for ECG and ECG-PRS mdoels per patient")

# Distribution plots

## ECG per sample

In [None]:
sns.set(font_scale= 3)
sns.set_style("white")
ecg_per_sample = pd.DataFrame({"label": ecg_per_sample_labels, "proba": ecg_per_sample_proba})
g = sns.displot(ecg_per_sample, x="proba", hue="label", element="step", fill=True, stat="density", legend=False, 
                palette=["tab:green", "tab:red"], bins=np.arange(0, 1.1, 0.1), height=10, aspect=1)

plt.xlabel('Predicted score')
plt.legend(labels=["BrP","No BrP"])
plt.savefig("Plots_performance_test_set/ECG_ps_discr.png")

## ECG per patient

In [None]:
ecg_per_patient = pd.DataFrame({"label": ecg_per_patient_labels, "proba": ecg_per_patient_proba})
g = sns.displot(ecg_per_patient, x="proba", hue="label", element="step", fill=True, stat="density", legend=False, 
                palette=["tab:green", "tab:red"] , bins=np.arange(0, 1.1, 0.1), height=10, aspect=1)
plt.xlabel('Predicted score')
plt.legend(labels=["BrP","No BrP"])
plt.savefig("Plots_performance_test_set/ECG_pp_discr.png")

## ECG PRS per sample

In [None]:
ecg_prs_per_sample = pd.DataFrame({"label": ecg_prs_per_sample_labels, "proba": ecg_prs_per_sample_proba})
g = sns.displot(ecg_prs_per_sample, x="proba", hue="label", element="step", fill=True, stat="density", 
                legend=False, palette=["tab:green", "tab:red"], bins=np.arange(0, 1.1, 0.1), height=10, aspect=1)
plt.xlabel('Predicted score')
plt.legend(labels=["BrP","No BrP"])
plt.savefig("Plots_performance_test_set/ECG_PRS_ps_discr.png")

## ECG PRS per patient

In [None]:
ecg_prs_per_patient = pd.DataFrame({"label": ecg_prs_per_patient_labels, "proba": ecg_prs_per_patient_proba})
g = sns.displot(ecg_prs_per_patient, x="proba", hue="label", element="step", fill=True, stat="density", 
                legend=False, palette=["tab:green", "tab:red"], bins=np.arange(0, 1.1, 0.1), height=10, aspect=1)
plt.xlabel('Predicted score')
plt.legend(labels=["BrP","No BrP"])
plt.savefig("Plots_performance_test_set/ECG_PRS_pp_discr.png")