# Functions

In [24]:
import os
import numpy as np
from sklearn.metrics import confusion_matrix, f1_score, roc_auc_score, average_precision_score, roc_curve, multilabel_confusion_matrix, ConfusionMatrixDisplay, accuracy_score


def round_func(num):
    return np.round(num, 3)

def get_metrics(true, pred, th):
    pred_ = pred.copy()
    if th is not None:
        pred_[pred_>=th] = 1
        pred_[pred_<th] = 0
    tn, fp, fn, tp = confusion_matrix(true, pred_, normalize = None).ravel()
    
    fpr = fp/(fp + tn)
    tpr = tp/(tp+fn)  #recall
    npv = tn/(tn+fn)
    prec = tp/(tp+fp)
    
    
    f1 = 2*tpr*prec/(tpr + prec)
    return fpr, tpr, npv, f1, prec

def CI_metric_fcn(y_pred, y_true, indices, class_ind = None, type_ = None):
    if type_ == "auc":
        score = roc_auc_score(y_true[:, class_ind][indices], y_pred[:, class_ind][indices])
    elif type_ == 'weighted_auc':
        score = roc_auc_score(y_true[indices], y_pred[indices], multi_class='ovr', average='weighted')
    elif type_ == 'accuracy':
        score = np.mean((np.argmax(y_true, 1) == np.argmax(y_pred,1))[indices])
    elif type_ == "accuracy_class":
            score = np.mean(y_true[:,class_ind][indices] ==  (np.argmax(y_pred,1)[indices] == class_ind))
    else:
        fpr_th, tpr_th, npv_th, f1_th, prec_th = get_metrics(y_true[indices,class_ind], np.argmax(y_pred,1)[indices] == class_ind, th = 0.5)
        if type_ == 'specificity':
            score = 1-fpr_th
        elif type_ == 'recall':
            score = tpr_th
        elif type_ == 'precision':
            score = prec_th
        elif type_ == "npv":
            score = npv_th
        
            
    return score

    
def CI(y_pred, y_true, class_ind,metric,  n_bootstraps  = 100, ci_intervel = 0.95, n_classes_to_find = 4, return_bs_values = False):
    rng_seed = 42  # control reproducibility
    bootstrapped_scores = []

    rng = np.random.RandomState(rng_seed)
    for i in range(n_bootstraps):
        # bootstrap by sampling with replacement on the prediction indices
        indices = rng.randint(0, len(y_pred), len(y_pred))
        if  (metric == 'auc' or metric == 'specificity') and len(np.unique(y_true[indices])) < 2:
            # We need at least one positive and one negative sample for ROC AUC
            # to be defined: reject the sample
            continue
        else:
            n_unique_classes = np.unique(np.argmax(y_true[indices], 1))
            if len(n_unique_classes) < n_classes_to_find:
                continue
        score = CI_metric_fcn(y_pred, y_true, indices, class_ind, type_ = metric)        
        bootstrapped_scores.append(score)

    sorted_scores = np.array(bootstrapped_scores)
    sorted_scores.sort()

    gap = (1-ci_intervel)/2
    confidence_lower = sorted_scores[int(gap * len(sorted_scores))]
    confidence_upper = sorted_scores[int((1-gap) * len(sorted_scores))]
    if return_bs_values:
        return np.round(confidence_lower, 3), np.round(confidence_upper,3), bootstrapped_scores
    else:
        return np.round(confidence_lower, 3), np.round(confidence_upper,3)


def calculate_and_print_metrics(new_gts, new_pred):
    all_roc_auc = roc_auc_score(new_gts, new_pred, multi_class='ovr', average=None)
    all_ap = average_precision_score(new_gts, new_pred,  average=None)
    all_f1 = f1_score(np.argmax(new_gts,1), np.argmax(new_pred,1),  average=None)
    all_cm = confusion_matrix(np.argmax(new_gts,1), np.argmax(new_pred,1), normalize = None)
    avg_auc = round_func(roc_auc_score(new_gts, new_pred, multi_class='ovr', average='weighted'))
    
    print(f"\n========== Metrics===========")
    print("Average ROC AUC: ", all_roc_auc)
    print(f"Final Average ROC AUC: Weighted: {avg_auc} " )
    print("Average PR AUC: ", all_ap)
    print("Average F1: ", all_f1)
    print("Average CM: \n", all_cm)
    print(f"Accuracy = {np.mean(np.argmax(new_gts, 1) == np.argmax(new_pred,1))}")
    
    for class_, class_ind in label2num.items():
        if class_ind >= gt.shape[1]:
            continue
        fpr, tpr, thresh = roc_curve(new_gts[:,class_ind], new_pred[:,class_ind])
        acc_class = np.mean(new_gts[:,class_ind] == (np.argmax(new_pred,1) == class_ind))
        fpr_th, tpr_th, npv_th, f1_th, prec_th = get_metrics(new_gts[:,class_ind], np.argmax(new_pred,1) == class_ind, th = 0.5)
        print(f"Class: {class_} ({np.sum(new_gts[:,class_ind])}): Acc: {np.round(acc_class, 3)}, FPR: {np.round(fpr_th, 3)}, TPR: {np.round(tpr_th, 3)}, NPV: {np.round(npv_th, 3)}, F1: {np.round(f1_th, 3)}, PPV: {np.round(prec_th, 3)}")



    
def calculate_CIs_all_classes(new_gts, new_pred):
    n_bs = 1000
    n_classes = new_gts.shape[1]
    
    print(f"\nConfidence Intervels: ")
    for class_, class_ind in label2num.items():
        if class_ind >= n_classes:
            continue
            
        lower, upper, _ = CI(new_pred, new_gts, class_ind, metric = 'auc', n_bootstraps=n_bs, return_bs_values = True, n_classes_to_find = n_classes)
        print(f"{class_} AUC: [{lower} - {upper}]")
    lower, upper, _ = CI(new_pred, new_gts, class_ind = None, metric = 'weighted_auc',  n_bootstraps=n_bs, return_bs_values = True, n_classes_to_find = n_classes)
    print(f"All class weighted AUC: [{lower} - {upper}]")
    lower, upper, _ = CI(new_pred, new_gts, class_ind = None, metric = 'accuracy',  n_bootstraps=n_bs, return_bs_values = True, n_classes_to_find = n_classes)
    print(f"Accuracy: [{lower} - {upper}]")

    for class_, class_ind in label2num.items():
        if class_ind >= n_classes:
            continue
        print(f"\n{class_}: ")
        lower, upper, _ = CI(new_pred, new_gts, class_ind = class_ind, metric = 'accuracy_class',  n_bootstraps=n_bs, return_bs_values = True, n_classes_to_find = n_classes)
        print(f"Acc: [{lower} - {upper}]")
        lower, upper, _ = CI(new_pred, new_gts, class_ind = class_ind, metric = 'specificity',  n_bootstraps=n_bs, return_bs_values = True, n_classes_to_find = n_classes)
        print(f"Specificity: [{lower} - {upper}]")
        lower, upper, _ = CI(new_pred, new_gts, class_ind = class_ind, metric = 'recall',  n_bootstraps=n_bs, return_bs_values = True, n_classes_to_find = n_classes)
        print(f"Recall: [{lower} - {upper}]")
        lower, upper, _ = CI(new_pred, new_gts, class_ind = class_ind, metric = 'precision',  n_bootstraps=n_bs, return_bs_values = True, n_classes_to_find = n_classes)
        print(f"Precision: [{lower} - {upper}]")
        lower, upper, _ = CI(new_pred, new_gts, class_ind = class_ind, metric = 'npv',  n_bootstraps=n_bs, return_bs_values = True, n_classes_to_find = n_classes)
        print(f"NPV: [{lower} - {upper}]")
    


In [25]:

label2num = {"normal":0, "dcm":1, "hcm":2, "ihd":3}
all_predictions = np.load("all_model_predictions.npz", allow_pickle= True)

model_num2name = {
    1: "3class-VAE(88)",
    2: "3class-VAE+LS(96)",
    3: "3class-RF(96)",
    4: "3class-XGB(96)",
    5: "4class-VAE(88)",
    6: "4class-VAE+LS(96)",
    7: "4class-VAE+LS w. co-training(96)",
    8: "4class-RF(96)",
    9: "4class-XGB(96)"
}



# Model 1

In [26]:
model_num = 1
print(f"Model {model_num}: {model_num2name[model_num]}")

gt = all_predictions[f"model_{model_num}"].item()["gt"]
pred = all_predictions[f"model_{model_num}"].item()["pred"]

calculate_and_print_metrics(gt, pred)
calculate_CIs_all_classes(gt, pred)

Model 1: 3class-VAE(88)

Average ROC AUC:  [0.96162281 0.94630872 0.94445554]
Final Average ROC AUC: Weighted: 0.956 
Average PR AUC:  [0.96608718 0.87931307 0.87529146]
Average F1:  [0.93449782 0.8        0.8358209 ]
Average CM: 
 [[107   4   3]
 [  4  24   1]
 [  4   3  28]]
Accuracy = 0.8932584269662921
Class: normal (114.0): Acc: 0.916, FPR: 0.125, TPR: 0.939, NPV: 0.889, F1: 0.934, PPV: 0.93
Class: dcm (29.0): Acc: 0.933, FPR: 0.047, TPR: 0.828, NPV: 0.966, F1: 0.8, PPV: 0.774
Class: hcm (35.0): Acc: 0.938, FPR: 0.028, TPR: 0.8, NPV: 0.952, F1: 0.836, PPV: 0.875

Confidence Intervels: 
normal AUC: [0.924 - 0.989]
dcm AUC: [0.876 - 0.992]
hcm AUC: [0.894 - 0.983]
All class weighted AUC: [0.921 - 0.983]
Accuracy: [0.848 - 0.938]

normal: 
Acc: [0.876 - 0.955]
Specificity: [0.795 - 0.953]
Recall: [0.892 - 0.975]
Precision: [0.885 - 0.974]
NPV: [0.806 - 0.956]

dcm: 
Acc: [0.893 - 0.966]
Specificity: [0.916 - 0.981]
Recall: [0.682 - 0.958]
Precision: [0.611 - 0.917]
NPV: [0.935 - 0.99

# Model 2

In [27]:
model_num = 2
print(f"Model {model_num}: {model_num2name[model_num]}")

gt = all_predictions[f"model_{model_num}"].item()["gt"]
pred = all_predictions[f"model_{model_num}"].item()["pred"]

calculate_and_print_metrics(gt, pred)
calculate_CIs_all_classes(gt, pred)

Model 2: 3class-VAE+LS(96)

Average ROC AUC:  [0.96902412 0.96366582 0.94685315]
Final Average ROC AUC: Weighted: 0.964 
Average PR AUC:  [0.97879108 0.88162062 0.8905448 ]
Average F1:  [0.9380531  0.84745763 0.78873239]
Average CM: 
 [[106   2   6]
 [  2  25   2]
 [  4   3  28]]
Accuracy = 0.8932584269662921
Class: normal (114.0): Acc: 0.921, FPR: 0.094, TPR: 0.93, NPV: 0.879, F1: 0.938, PPV: 0.946
Class: dcm (29.0): Acc: 0.949, FPR: 0.034, TPR: 0.862, NPV: 0.973, F1: 0.847, PPV: 0.833
Class: hcm (35.0): Acc: 0.916, FPR: 0.056, TPR: 0.8, NPV: 0.951, F1: 0.789, PPV: 0.778

Confidence Intervels: 
normal AUC: [0.94 - 0.991]
dcm AUC: [0.924 - 0.992]
hcm AUC: [0.902 - 0.983]
All class weighted AUC: [0.936 - 0.985]
Accuracy: [0.848 - 0.933]

normal: 
Acc: [0.882 - 0.955]
Specificity: [0.833 - 0.969]
Recall: [0.882 - 0.971]
Precision: [0.906 - 0.982]
NPV: [0.794 - 0.949]

dcm: 
Acc: [0.916 - 0.978]
Specificity: [0.933 - 0.993]
Recall: [0.727 - 0.97]
Precision: [0.69 - 0.963]
NPV: [0.946 - 0.

# Model 3

In [28]:
model_num = 3
print(f"Model {model_num}: {model_num2name[model_num]}")

gt = all_predictions[f"model_{model_num}"].item()["gt"]
pred = all_predictions[f"model_{model_num}"].item()["pred"]

calculate_and_print_metrics(gt, pred)
calculate_CIs_all_classes(gt, pred)

Model 3: 3class-RF(96)

Average ROC AUC:  [0.96655702 0.97234436 0.95714286]
Final Average ROC AUC: Weighted: 0.966 
Average PR AUC:  [0.978855   0.90831707 0.88604122]
Average F1:  [0.95689655 0.86206897 0.81818182]
Average CM: 
 [[111   0   3]
 [  3  25   1]
 [  4   4  27]]
Accuracy = 0.9157303370786517
Class: normal (114): Acc: 0.944, FPR: 0.109, TPR: 0.974, NPV: 0.95, F1: 0.957, PPV: 0.941
Class: dcm (29): Acc: 0.955, FPR: 0.027, TPR: 0.862, NPV: 0.973, F1: 0.862, PPV: 0.862
Class: hcm (35): Acc: 0.933, FPR: 0.028, TPR: 0.771, NPV: 0.946, F1: 0.818, PPV: 0.871

Confidence Intervels: 
normal AUC: [0.937 - 0.99]
dcm AUC: [0.943 - 0.995]
hcm AUC: [0.923 - 0.987]
All class weighted AUC: [0.941 - 0.987]
Accuracy: [0.876 - 0.955]

normal: 
Acc: [0.91 - 0.978]
Specificity: [0.815 - 0.959]
Recall: [0.942 - 1.0]
Precision: [0.898 - 0.981]
NPV: [0.893 - 1.0]

dcm: 
Acc: [0.927 - 0.983]
Specificity: [0.943 - 0.994]
Recall: [0.733 - 0.971]
Precision: [0.722 - 0.971]
NPV: [0.946 - 0.994]

hcm: 

# Model 4

In [30]:
model_num = 4
print(f"Model {model_num}: {model_num2name[model_num]}")

gt = all_predictions[f"model_{model_num}"].item()["gt"]
pred = all_predictions[f"model_{model_num}"].item()["pred"]

calculate_and_print_metrics(gt, pred)
calculate_CIs_all_classes(gt, pred)

Model 4: 3class-XGB(96)

Average ROC AUC:  [0.96847588 0.96760009 0.96283716]
Final Average ROC AUC: Weighted: 0.967 
Average PR AUC:  [0.97568908 0.92470776 0.90123008]
Average F1:  [0.9527897  0.83636364 0.85294118]
Average CM: 
 [[111   0   3]
 [  5  23   1]
 [  3   3  29]]
Accuracy = 0.9157303370786517
Class: normal (114): Acc: 0.938, FPR: 0.125, TPR: 0.974, NPV: 0.949, F1: 0.953, PPV: 0.933
Class: dcm (29): Acc: 0.949, FPR: 0.02, TPR: 0.793, NPV: 0.961, F1: 0.836, PPV: 0.885
Class: hcm (35): Acc: 0.944, FPR: 0.028, TPR: 0.829, NPV: 0.959, F1: 0.853, PPV: 0.879

Confidence Intervels: 
normal AUC: [0.935 - 0.993]
dcm AUC: [0.914 - 0.997]
hcm AUC: [0.931 - 0.988]
All class weighted AUC: [0.936 - 0.99]
Accuracy: [0.876 - 0.955]

normal: 
Acc: [0.899 - 0.972]
Specificity: [0.788 - 0.952]
Recall: [0.942 - 1.0]
Precision: [0.887 - 0.975]
NPV: [0.891 - 1.0]

dcm: 
Acc: [0.916 - 0.978]
Specificity: [0.954 - 1.0]
Recall: [0.645 - 0.929]
Precision: [0.75 - 1.0]
NPV: [0.929 - 0.987]

hcm: 
Ac

# Model 5

In [31]:
model_num = 5
print(f"Model {model_num}: {model_num2name[model_num]}")

gt = all_predictions[f"model_{model_num}"].item()["gt"]
pred = all_predictions[f"model_{model_num}"].item()["pred"]

calculate_and_print_metrics(gt, pred)
calculate_CIs_all_classes(gt, pred)

Model 5: 4class-VAE(88)

Average ROC AUC:  [0.95048476 0.87778263 0.91539889 0.85412411]
Final Average ROC AUC: Weighted: 0.906 
Average PR AUC:  [0.91757803 0.51972195 0.80640231 0.72924792]
Average F1:  [0.87179487 0.55384615 0.80555556 0.68322981]
Average CM: 
 [[102   3   3   6]
 [  2  18   1   8]
 [  2   0  29   4]
 [ 14  15   4  55]]
Accuracy = 0.7669172932330827
Class: normal (114.0): Acc: 0.887, FPR: 0.118, TPR: 0.895, NPV: 0.918, F1: 0.872, PPV: 0.85
Class: dcm (29.0): Acc: 0.891, FPR: 0.076, TPR: 0.621, NPV: 0.952, F1: 0.554, PPV: 0.5
Class: hcm (35.0): Acc: 0.947, FPR: 0.035, TPR: 0.829, NPV: 0.974, F1: 0.806, PPV: 0.784
Class: ihd (88.0): Acc: 0.808, FPR: 0.101, TPR: 0.625, NPV: 0.829, F1: 0.683, PPV: 0.753

Confidence Intervels: 
normal AUC: [0.926 - 0.971]
dcm AUC: [0.801 - 0.94]
hcm AUC: [0.852 - 0.972]
ihd AUC: [0.81 - 0.896]
All class weighted AUC: [0.878 - 0.934]
Accuracy: [0.714 - 0.816]

normal: 
Acc: [0.85 - 0.925]
Specificity: [0.828 - 0.93]
Recall: [0.837 - 0.947

# Model 6

In [32]:
model_num = 6
print(f"Model {model_num}: {model_num2name[model_num]}")

gt = all_predictions[f"model_{model_num}"].item()["gt"]
pred = all_predictions[f"model_{model_num}"].item()["pred"]

calculate_and_print_metrics(gt, pred)
calculate_CIs_all_classes(gt, pred)

Model 6: 4class-VAE+LS(96)

Average ROC AUC:  [0.95227378 0.88142005 0.90773036 0.85565628]
Final Average ROC AUC: Weighted: 0.907 
Average PR AUC:  [0.92607609 0.51090128 0.8217983  0.73506642]
Average F1:  [0.89177489 0.5625     0.79452055 0.69512195]
Average CM: 
 [[103   2   3   6]
 [  1  18   1   9]
 [  2   0  29   4]
 [ 11  15   5  57]]
Accuracy = 0.7781954887218046
Class: normal (114.0): Acc: 0.906, FPR: 0.092, TPR: 0.904, NPV: 0.926, F1: 0.892, PPV: 0.88
Class: dcm (29.0): Acc: 0.895, FPR: 0.072, TPR: 0.621, NPV: 0.952, F1: 0.562, PPV: 0.514
Class: hcm (35.0): Acc: 0.944, FPR: 0.039, TPR: 0.829, NPV: 0.974, F1: 0.795, PPV: 0.763
Class: ihd (88.0): Acc: 0.812, FPR: 0.107, TPR: 0.648, NPV: 0.837, F1: 0.695, PPV: 0.75

Confidence Intervels: 
normal AUC: [0.928 - 0.974]
dcm AUC: [0.809 - 0.94]
hcm AUC: [0.844 - 0.966]
ihd AUC: [0.808 - 0.899]
All class weighted AUC: [0.878 - 0.934]
Accuracy: [0.726 - 0.827]

normal: 
Acc: [0.868 - 0.94]
Specificity: [0.857 - 0.951]
Recall: [0.846 -

# Model 7

In [33]:
model_num = 7
print(f"Model {model_num}: {model_num2name[model_num]}")

gt = all_predictions[f"model_{model_num}"].item()["gt"]
pred = all_predictions[f"model_{model_num}"].item()["pred"]

calculate_and_print_metrics(gt, pred)
calculate_CIs_all_classes(gt, pred)

Model 7: 4class-VAE+LS w. co-training(96)

Average ROC AUC:  [0.91701293 0.85726757 0.92801484 0.75114913]
Final Average ROC AUC: Weighted: 0.857 
Average PR AUC:  [0.89726329 0.46110229 0.82896522 0.63105423]
Average F1:  [0.66666667 0.44036697 0.69135802 0.48148148]
Average CM: 
 [[60 18  9 27]
 [ 1 24  1  3]
 [ 1  1 28  5]
 [ 4 37  8 39]]
Accuracy = 0.5676691729323309
Class: normal (114.0): Acc: 0.774, FPR: 0.039, TPR: 0.526, NPV: 0.73, F1: 0.667, PPV: 0.909
Class: dcm (29.0): Acc: 0.771, FPR: 0.236, TPR: 0.828, NPV: 0.973, F1: 0.44, PPV: 0.3
Class: hcm (35.0): Acc: 0.906, FPR: 0.078, TPR: 0.8, NPV: 0.968, F1: 0.691, PPV: 0.609
Class: ihd (88.0): Acc: 0.684, FPR: 0.197, TPR: 0.443, NPV: 0.745, F1: 0.481, PPV: 0.527

Confidence Intervels: 
normal AUC: [0.883 - 0.949]
dcm AUC: [0.779 - 0.922]
hcm AUC: [0.87 - 0.978]
ihd AUC: [0.69 - 0.808]
All class weighted AUC: [0.825 - 0.89]
Accuracy: [0.511 - 0.628]

normal: 
Acc: [0.726 - 0.831]
Specificity: [0.928 - 0.987]
Recall: [0.434 - 0.619

# Model 8

In [34]:
model_num = 8
print(f"Model {model_num}: {model_num2name[model_num]}")

gt = all_predictions[f"model_{model_num}"].item()["gt"]
pred = all_predictions[f"model_{model_num}"].item()["pred"]

calculate_and_print_metrics(gt, pred)
calculate_CIs_all_classes(gt, pred)

Model 8: 4class-RF(96)

Average ROC AUC:  [0.93778855 0.88789466 0.9314162  0.82312947]
Final Average ROC AUC: Weighted: 0.894 
Average PR AUC:  [0.89779003 0.47923199 0.74508316 0.72004258]
Average F1:  [0.87603306 0.4        0.74285714 0.67428571]
Average CM: 
 [[106   0   3   5]
 [  2   9   0  18]
 [  4   0  26   5]
 [ 16   7   6  59]]
Accuracy = 0.7518796992481203
Class: normal (114): Acc: 0.887, FPR: 0.145, TPR: 0.93, NPV: 0.942, F1: 0.876, PPV: 0.828
Class: dcm (29): Acc: 0.898, FPR: 0.03, TPR: 0.31, NPV: 0.92, F1: 0.4, PPV: 0.562
Class: hcm (35): Acc: 0.932, FPR: 0.039, TPR: 0.743, NPV: 0.961, F1: 0.743, PPV: 0.743
Class: ihd (88): Acc: 0.786, FPR: 0.157, TPR: 0.67, NPV: 0.838, F1: 0.674, PPV: 0.678

Confidence Intervels: 
normal AUC: [0.908 - 0.963]
dcm AUC: [0.832 - 0.932]
hcm AUC: [0.873 - 0.975]
ihd AUC: [0.768 - 0.877]
All class weighted AUC: [0.864 - 0.923]
Accuracy: [0.699 - 0.801]

normal: 
Acc: [0.85 - 0.925]
Specificity: [0.793 - 0.911]
Recall: [0.881 - 0.973]
Precisio

# Model 9

In [35]:
model_num = 9
print(f"Model {model_num}: {model_num2name[model_num]}")

gt = all_predictions[f"model_{model_num}"].item()["gt"]
pred = all_predictions[f"model_{model_num}"].item()["pred"]

calculate_and_print_metrics(gt, pred)
calculate_CIs_all_classes(gt, pred)

Model 9: 4class-XGB(96)

Average ROC AUC:  [0.95025392 0.88345701 0.9144094  0.84620787]
Final Average ROC AUC: Weighted: 0.904 
Average PR AUC:  [0.9059978  0.51657649 0.78036425 0.73216575]
Average F1:  [0.88607595 0.48       0.7826087  0.70454545]
Average CM: 
 [[105   0   2   7]
 [  2  12   0  15]
 [  4   0  27   4]
 [ 12   9   5  62]]
Accuracy = 0.7744360902255639
Class: normal (114): Acc: 0.898, FPR: 0.118, TPR: 0.921, NPV: 0.937, F1: 0.886, PPV: 0.854
Class: dcm (29): Acc: 0.902, FPR: 0.038, TPR: 0.414, NPV: 0.931, F1: 0.48, PPV: 0.571
Class: hcm (35): Acc: 0.944, FPR: 0.03, TPR: 0.771, NPV: 0.966, F1: 0.783, PPV: 0.794
Class: ihd (88): Acc: 0.805, FPR: 0.146, TPR: 0.705, NPV: 0.854, F1: 0.705, PPV: 0.705

Confidence Intervels: 
normal AUC: [0.925 - 0.973]
dcm AUC: [0.815 - 0.937]
hcm AUC: [0.832 - 0.976]
ihd AUC: [0.794 - 0.893]
All class weighted AUC: [0.873 - 0.934]
Accuracy: [0.726 - 0.823]

normal: 
Acc: [0.861 - 0.932]
Specificity: [0.826 - 0.931]
Recall: [0.869 - 0.966]
P