# Results
Notebook to plot ROC curves and confusion matrices for single models and ensemble models, for the different tasks under study (anomaly detection, vessel classification and course classification). The plots can be generated for the entire test sets or stratified by gender.

The notebook uses the output files named `predictions_test_*.xlsx` that contain predictions for single models and ensemble models, which are generated by the script `plot.py`.

In [None]:
import os
from os.path import join
import matplotlib.pyplot as plt
import matplotlib
import seaborn as sns
import pandas as pd
import numpy as np
from roc_utils import plot_roc, plot_roc_bootstrap, compute_roc, plot_mean_roc, compute_mean_roc
from tableone import TableOne

from evaluation_core import plot_confusion_matrix, add_gender, compute_metrics

titles_dict = { 
    "train": f"Train",
    "test_internal": f"Internal Test",
    "test_external": f"External Test"
}
sns.set_theme(style="white")
color_palette = matplotlib.colormaps.get_cmap('tab10').colors

In [None]:
n_boot = 10000  # number of bootstrap iterations for ROC confidence intervals
threshold_title = "Cut-Off"
thresholds = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]  # Other thresholds explored
threshold = 0.5
random_seed = 42

dpi = 300
input_path = "raw_data_plots_tables"
output_folder = "images/paper/"
os.makedirs(output_folder, exist_ok=True)

In [None]:
tasks = [
    "anomaly_detection",
    "origin_classification",
    "risk_classification",
]
test_datasets = ["test_internal", "test_external"]

In [None]:
show = False
def plt_show():
    if show:
        plt.show()
    else:
        plt.close()

## Table 1
Summary of different classification metrics for the ensemble models for different test datasets.

In [None]:
def table1(g="", tasks=("anomaly_detection", "origin_classification", "risk_classification"), datasets=("test_internal", "test_external")):
    labels = []
    metrics = []
    for task in tasks:
        for test_dataset in datasets:
            df = pd.read_excel(add_gender(join(input_path, f"predictions_{task}_{test_dataset}_train.xlsx"), g))        
            metrics.append(pd.DataFrame.from_dict(compute_metrics(df["label"], df["ensemble"]), orient="index"))
            labels.append((task.replace("_", " ").capitalize(), test_dataset.replace("_", " ").capitalize()))
    metrics = pd.concat(metrics, axis=1)
    metrics = metrics.iloc[:7].transpose()
    metrics.index = pd.MultiIndex.from_tuples(labels)
    metrics = metrics.transpose().round(3)
    return metrics

In [None]:
metrics = table1()
metrics.to_excel("images/paper/Table1.xlsx")
metrics

## Figure 3
ROC curves including the 5 different folds and the ensemble of 5 models across different tasks for various test datasets.

In [None]:
def plot_roc_single_ensemble(tasks, set_names, colors, strategy, gender, fig_base_name,):
    for task in tasks:    
        for set_name, color in zip(set_names, colors):
            df = pd.read_excel(add_gender(join(input_path, f"predictions_{task}_{set_name}_{strategy}.xlsx"), gender))
            # PLot ROC curves for single models
            roc_curves = []
            for i in [0, 1, 2, 3, 4]:
                roc = compute_roc(X=df[f"model_{i}"], y=df["label"], pos_label=1)
                roc_curves.append(roc)
                plot_roc(roc, label=f"Model {i + 1}", color=color_palette[i])
            plt.title(add_gender(f"{task.replace("_", " ").capitalize()} - {titles_dict[set_name]} - 5-Folds", gender, True))
            plt.savefig(f"images/paper/{fig_base_name}_{task}_{set_name}_singleModels.png", dpi=dpi)
            plt_show()
            
            # Plot ROC curve for the ensemble model
            plot_roc_bootstrap(X=df["ensemble"], y=df["label"], pos_label=1, label=f"{titles_dict[set_name]}",
                           color=color, n_bootstrap=n_boot, show_boots=False, random_state=random_seed)
            plt.title(add_gender(f"{task.replace("_", " ").capitalize()} - {titles_dict[set_name]} - Ensemble", gender, True)) 
            plt.suptitle("")     # Clear the subtitle
            plt.savefig(f"images/paper/{fig_base_name}_{task}_{set_name}_ensemble.png", dpi=dpi)
            plt_show()

In [None]:
strategy = "train"
gender = ""
plot_roc_single_ensemble(tasks=tasks, set_names=test_datasets, colors=["red", "blue"], strategy=strategy, gender=gender,
                         fig_base_name="Figure3")

## Figure 4
Confusion matrices of the ensemble models for all tasks and testing datasets.

In [None]:
def plot_confusion_matrices(tasks, strategy, set_names, gender, thresh, base_name):
    for task in tasks:
        for set_name in set_names:
            df = pd.read_excel(add_gender(join(input_path, f"predictions_{task}_{set_name}_{strategy}.xlsx"), gender))
            title = add_gender(f"{task.replace("_", " ").capitalize()} - {titles_dict[set_name]}", gender, True)
            plot_confusion_matrix(predictions=df["ensemble"] > thresh, labels=df["label"].astype(int),
                                  output_path=f"images/paper/{base_name}_{task}_{set_name}.png",
                                  title=title, show=show, dpi=dpi)
            plt_show()

In [None]:
strategy = "train"
plot_confusion_matrices(tasks=tasks, strategy=strategy, set_names=["test_internal", "test_external"], gender="",
                        thresh=threshold, base_name="Figure4")

## Figure 6
t-SNE maps of the latent features extracted from the last layer of one of the models trained for anomaly detection.

In [None]:
def plot_tsne(label, gender, fig_base_name, show_legend=True):
    base_colors = [r"#50ad76", r"#ff7979"]
    colors = base_colors * 3
    markers = ["X", "X", "o", "o", "^", "^",] 

    combination = ('test_external', 'train', 'test_internal')
    loc = "upper right"
    path_tsne = add_gender(join(input_path, "tsne_" + "-".join(combination) + ".xlsx"), gender)
    df_tsne = pd.read_excel(path_tsne)

    df_tsne = df_tsne[df_tsne[label].notna()]
    df_tsne = df_tsne.sort_values(label, ascending=False)

    if show_legend:
        fig_size= 21
        s = 70
    else:
        fig_size= 16
        s = 50

    plt.figure(figsize=(fig_size, fig_size))
    sns.scatterplot(x='tsne_1', y='tsne_2', 
                    hue=label, 
                    style=label,
                    data=df_tsne, 
                    s=s, 
                    palette=colors[:2 * len(combination)],
                    markers=markers[:2 * len(combination)]
                   )
    plt.xlim((np.min(df_tsne["tsne_1"]) - 1, np.max(df_tsne["tsne_1"]) + 1))
    plt.ylim((np.min(df_tsne["tsne_2"]) - 1, np.max(df_tsne["tsne_2"]) + 1))
    legend = plt.legend(bbox_to_anchor=(1.13, 1.00), loc=loc, borderaxespad=0.0, fontsize=20, markerscale=2)
    if not show_legend:
        legend.remove()
    plt.axis("off")
    filename = f"{fig_base_name}_{label.split("_")[1]}"
    filename = filename if show_legend else f"{filename}_noLegend"
    filename = add_gender(filename, gender)
    plt.savefig(f"images/paper/{filename}.png", bbox_inches='tight', dpi=dpi)
    plt_show()

In [None]:
show_legend = False
gender = ""
fig_base_name = "Figure6"
plot_tsne(label='labels_anomaly', gender=gender, fig_base_name=fig_base_name, show_legend=show_legend)

In [None]:
plot_tsne(label='labels_origin', gender=gender, fig_base_name=fig_base_name, show_legend=show_legend)

In [None]:
plot_tsne(label='labels_risk', gender=gender, fig_base_name=fig_base_name, show_legend=show_legend)

## Supplementary Table 2
Demographics and acquisition information (images as statistical unit).

In [None]:
dataset = pd.read_excel("raw_data_plots_tables/raw_demographics_combined_images.xlsx")
columns = ["Age", "Sex", "BMI", "Manufacturer", "Manufacturer Model Name", "Peak voltage (kVp)", "Exposure (mAs)", "x", "y", "z",]
categorical = ["Sex", "Manufacturer", "Manufacturer Model Name", "Peak voltage (kVp)"]
non_normal = ["Age", "BMI", "Exposure Time (s)", "Tube Current (mA)", "Exposure (mAs)", "x", "y", "z"]
table_all = TableOne(dataset, columns=columns, categorical=categorical, groupby="Dataset", nonnormal=non_normal, pval=False, missing=False, overall=False)
table_all.tableone.columns = [col[1] for col in table_all.tableone.columns]
table_all.tableone = table_all.tableone.loc[:, ["Train", "Test Internal", "Test External"]]
table_all.tableone.to_excel(f"images/paper/SupplementaryTable2.xlsx")
print(table_all.tabulate(tablefmt="fancygrid"))

## Supplementary Table 3
Demographics information (patients as statistical unit).

In [None]:
dataset = pd.read_excel("raw_data_plots_tables/raw_demographics_combined_patients.xlsx")
columns = ["Age", "Sex", "BMI"]
categorical = ["Sex"]
non_normal = ["Age", "BMI"]
table_pat = TableOne(dataset, columns=columns, categorical=categorical, nonnormal=non_normal, groupby="Dataset", pval=False, missing=False, overall=False)
table_pat.tableone.columns = [col[1] for col in table_pat.tableone.columns]
table_pat.tableone = table_pat.tableone.loc[:, ["Train", "Test Internal", "Test External"]]
table_pat.tableone.to_excel(f"images/paper/SupplementaryTable3.xlsx")
print(table_pat.tabulate(tablefmt="fancygrid"))

## Supplementary Table 4
Summary of different classification metrics for the ensemble models for different test datasets (as in Table 1), only for male patients.

In [None]:
metrics = table1(g="m")
metrics.to_excel("images/paper/SupplementaryTable4.xlsx")
metrics

## Supplementary Table 5
Summary of different classification metrics for the ensemble models for different test datasets (as in Table 1), only for female patients.

In [None]:
metrics = table1(g="f")
metrics.to_excel("images/paper/SupplementaryTable5.xlsx")
metrics

## Supplementary Table 6
Performance metrics of individual models, mean metrics across all models, and ensemble metrics obtained by combining the predictions of individual models for the **anomaly detection** task, evaluated on internal and external testing datasets.

In [None]:
def metrics_single_models_mean_ensemble(gender, task, test_datasets=("test_internal", "test_external"), strategy="train"):
    labels = []
    metrics_all = []
    metrics_ensemble = []
    for test_dataset in test_datasets:
        metrics = []
        df = pd.read_excel(add_gender(join(input_path, f"predictions_{task}_{test_dataset}_{strategy}.xlsx"), gender))     
        for i in range(5):
            metrics.append(pd.DataFrame.from_dict(compute_metrics(df["label"], df[f"model_{i}"]), orient="index"))
        metrics = pd.concat(metrics, axis=1).iloc[:7, :]
        labels.extend([(test_dataset.replace("_", " ").capitalize(), metric_name.replace("_", " ").capitalize()) for metric_name in metrics.index.values])
        metrics_all.append(metrics)
        # ensemble
        metrics_ensemble.append(pd.DataFrame.from_dict(compute_metrics(df["label"], df["ensemble"]), orient="index").iloc[:7].values[:, 0])
    
    metrics_all = pd.concat(metrics_all, axis=0)
    metrics_all.columns = [f"Fold {i+1}" for i in range(5)]
    metrics_all.index = pd.MultiIndex.from_tuples(labels)
    # Mean
    metrics_all["Mean"] = metrics_all.mean(axis=1)
    # Ensemble
    metrics_all["Ensemble"] = np.array(metrics_ensemble).ravel()
    return metrics_all.round(3)

In [None]:
metrics = metrics_single_models_mean_ensemble(gender="", task="anomaly_detection")
metrics.to_excel("images/paper/SupplementaryTable6.xlsx")
metrics

## Supplementary Table 7
Same as Supplementary Table 6, but only for male patients.

In [None]:
metrics = metrics_single_models_mean_ensemble(gender="m", task="anomaly_detection")
metrics.to_excel("images/paper/SupplementaryTable7.xlsx")
metrics

## Supplementary Table 8
Same as Supplementary Table 6, but only for female patients.

In [None]:
metrics = metrics_single_models_mean_ensemble(gender="f", task="anomaly_detection")
metrics.to_excel("images/paper/SupplementaryTable8.xlsx")
metrics

## Supplementary Table 9
Performance metrics of individual models, mean metrics across all models, and ensemble metrics obtained by combining the predictions of individual models for the **origin classification** task, evaluated on internal and external testing datasets.

In [None]:
metrics_all = metrics_single_models_mean_ensemble(gender="", task="origin_classification")
metrics_all.to_excel("images/paper/SupplementaryTable9.xlsx")
metrics_all

## Supplementary Table 10
Same as Supplementary Table 9, but only for male patients.

In [None]:
metrics_all = metrics_single_models_mean_ensemble(gender="m", task="origin_classification")
metrics_all.to_excel("images/paper/SupplementaryTable10.xlsx")
metrics_all

## Supplementary Table 11
Same as Supplementary Table 10, but only for female patients.

In [None]:
metrics_all = metrics_single_models_mean_ensemble(gender="f", task="origin_classification")
metrics_all.to_excel("images/paper/SupplementaryTable11.xlsx")
metrics_all

## Supplementary Table 12
Performance metrics of individual models, mean metrics across all models, and ensemble metrics obtained by combining the predictions of individual models for the **risk classification** task, evaluated on internal and external testing datasets.

In [None]:
metrics_all = metrics_single_models_mean_ensemble(gender="", task="risk_classification")
metrics_all.to_excel("images/paper/SupplementaryTable12.xlsx")
metrics_all

## Supplementary Table 13
Same as Supplementary Table 12, but only for male patients.

In [None]:
metrics_all = metrics_single_models_mean_ensemble(gender="m", task="risk_classification")
metrics_all.to_excel("images/paper/SupplementaryTable13.xlsx")
metrics_all

## Supplementary Table 14
Same as Supplementary Table 12, but only for female patients.

In [None]:
metrics_all = metrics_single_models_mean_ensemble(gender="f", task="risk_classification")
metrics_all.to_excel("images/paper/SupplementaryTable14.xlsx")
metrics_all

## Supplementary Figure 5
Single models and ensemble ROC curves for all tasks and test datasets (as in Figure 3), only for male patients.

In [None]:
strategy = "train"
gender = "m"
plot_roc_single_ensemble(tasks=tasks, set_names=test_datasets, colors=["red", "blue"], strategy=strategy, gender=gender,
                         fig_base_name="SupplementaryFigure5")

## Supplementary Figure 6
Single models and ensemble ROC curves for all tasks and test datasets (as in Figure 3), only for female patients.

In [None]:
strategy = "train"
gender = "f"
plot_roc_single_ensemble(tasks=tasks, set_names=test_datasets, colors=["red", "blue"], strategy=strategy, gender=gender,
                         fig_base_name="SupplementaryFigure6")

## Supplementary Figure 7
Mean ROC curves across models for all tasks and test datasets.

In [None]:
def plot_roc_mean(tasks, set_names, colors, gender, fig_base_name):
    for task in tasks:
        for set_name, color in zip(set_names, colors):
            df = pd.read_excel(add_gender(join(input_path, f"predictions_{task}_{set_name}_{strategy}.xlsx"), gender))
            roc_curves = []
            for i in range(5):
                roc = compute_roc(X=df[f"model_{i}"], y=df["label"], pos_label=1)
                roc_curves.append(roc)
            # PLot the mean ROC curves across single models
            auc_mean = compute_mean_roc(roc_curves)["auc_mean"]
            fig, ax = plt.subplots()
            label = f"Mean ROC curve (AUC {np.round(auc_mean, 3)})"
            plot_mean_roc(roc_curves, show_ci=False, show_ti=False, show_all=True, color=color, ax=ax)
            handles, _ = ax.get_legend_handles_labels()
            ax.legend(handles, [label], loc="lower right")
            plt.title(add_gender(f"{task.replace("_", " ").capitalize()} - {titles_dict[set_name]} - Mean ROC", gender, True))
            plt.savefig(f"images/paper/{fig_base_name}_{task}_{set_name}.png", dpi=dpi)
            plt_show()

In [None]:
gender = ""
strategy = "train"
plot_roc_mean(tasks=tasks, set_names=test_datasets, colors=["red", "blue"], fig_base_name="SupplementaryFigure7", gender=gender)

## Supplementary Figure 8
Same as Supplementary Figure 7, only for male patients

In [None]:
gender = "m"
plot_roc_mean(tasks=tasks, set_names=test_datasets, colors=["red", "blue"], fig_base_name="SupplementaryFigure8", gender=gender)

## Supplementary Figure 9
Same as Supplementary Figure 7, only for female patients

In [None]:
gender = "f"
plot_roc_mean(tasks=tasks, set_names=test_datasets, colors=["red", "blue"], fig_base_name="SupplementaryFigure9", gender=gender)


## Supplementary Figure 10
Confusion matrices of the ensemble models for all tasks and testing datasets (as in Figure 4), only fo males.

In [None]:
gender = "m"
threshold = 0.5
plot_confusion_matrices(tasks=tasks, strategy=strategy, set_names=["test_internal", "test_external"], gender=gender,
                        thresh=threshold, base_name="SupplementaryFigure10")

## Supplementary Figure 11
Confusion matrices of the ensemble models for all tasks and testing datasets (as in Figure 4), only fo females.

In [None]:
gender = "f"
plot_confusion_matrices(tasks=tasks, strategy=strategy, set_names=["test_internal", "test_external"], gender=gender,
                        thresh=threshold, base_name="SupplementaryFigure11")

## Supplementary Figure 12
Confusion matrices for anomaly detection task at different classification thresholds, for internal and external testing datasets.

In [None]:
def plot_confusion_matrices_thresholds(task, strategy, set_names, gender, thresholds, base_name):
    for set_name in set_names:
        df = pd.read_excel(add_gender(join(input_path, f"predictions_{task}_{set_name}_{strategy}.xlsx"), gender))
        for threshold in thresholds:
            title = add_gender(f"{task.replace("_", " ").capitalize()} - {titles_dict[set_name]} - {threshold_title}: " + str(threshold), gender, True)
            plot_confusion_matrix(predictions=df["ensemble"] > threshold, labels=df["label"].astype(int),
                              output_path=f"images/paper/{base_name}_{set_name}_{threshold}.png",
                              title=title, show=show, dpi=dpi)
            plt_show()

In [None]:
task = "anomaly_detection"
strategy = "train"
gender = ""
plot_confusion_matrices_thresholds(task=task, strategy=strategy, set_names=["test_internal", "test_external"], gender=gender,
                                   thresholds=thresholds, base_name="SupplementaryFigure12")

## Supplementary Figure 13
Same as Supplementary Figure 12 but only for male patients.

In [None]:
gender = "m"
plot_confusion_matrices_thresholds(task=task, strategy=strategy, set_names=["test_internal", "test_external"], gender=gender,
                                   thresholds=thresholds, base_name="SupplementaryFigure13")

## Supplementary Figure 14
Same as Supplementary Figure 12 but only for female patients.

In [None]:
gender = "f"
plot_confusion_matrices_thresholds(task=task, strategy=strategy, set_names=["test_internal", "test_external"], gender=gender,
                                   thresholds=thresholds, base_name="SupplementaryFigure14")

## Supplementary Figure 15
Confusion matrices for origin classification task at different classification thresholds, for internal and external testing datasets.

In [None]:
task = "origin_classification"
strategy = "train"
gender = ""
plot_confusion_matrices_thresholds(task=task, strategy=strategy, set_names=["test_internal", "test_external"], gender=gender,
                                   thresholds=thresholds, base_name="SupplementaryFigure15")

## Supplementary Figure 16
Same as Supplementary Figure 15 but only for male patients.

In [None]:
gender = "m"
plot_confusion_matrices_thresholds(task=task, strategy=strategy, set_names=["test_internal", "test_external"], gender=gender,
                                   thresholds=thresholds, base_name="SupplementaryFigure16")

## Supplementary Figure 17
Same as Supplementary Figure 15 but only for female patients.

In [None]:
gender = "f"
plot_confusion_matrices_thresholds(task=task, strategy=strategy, set_names=["test_internal", "test_external"], gender=gender,
                                   thresholds=thresholds, base_name="SupplementaryFigure17")

## Supplementary Figure 18
Confusion matrices for **risk classification** task at different classification thresholds, for internal and external testing datasets.

In [None]:
task = "risk_classification"
strategy = "train"
gender = ""
plot_confusion_matrices_thresholds(task=task, strategy=strategy, set_names=["test_internal", "test_external"], gender=gender,
                                   thresholds=thresholds, base_name="SupplementaryFigure18")

## Supplementary Figure 19
Same as Supplementary Figure 18 but only for male patients.

In [None]:
gender = "m"
plot_confusion_matrices_thresholds(task=task, strategy=strategy, set_names=["test_internal", "test_external"], gender=gender,
                                   thresholds=thresholds, base_name="SupplementaryFigure19")

## Supplementary Figure 20
Same as Supplementary Figure 18 but only for female patients.

In [None]:
gender = "f"
plot_confusion_matrices_thresholds(task=task, strategy=strategy, set_names=["test_internal", "test_external"], gender=gender,
                                   thresholds=thresholds, base_name="SupplementaryFigure20")

## Supplementary Figure 21

In [None]:
show = False
show_legend = False
fig_base_name = "SupplementaryFigure21"
gender = "m"
plot_tsne(label='labels_anomaly', gender=gender, fig_base_name=fig_base_name, show_legend=show_legend)

In [None]:
plot_tsne(label='labels_origin', gender=gender, fig_base_name=fig_base_name, show_legend=show_legend)

In [None]:
plot_tsne(label='labels_risk', gender=gender, fig_base_name=fig_base_name, show_legend=show_legend)

## Supplementary Figure 22

In [None]:
fig_base_name = "SupplementaryFigure22"
gender = "f"
plot_tsne(label='labels_anomaly', gender=gender, fig_base_name=fig_base_name, show_legend=show_legend)

In [None]:
plot_tsne(label='labels_origin', gender=gender, fig_base_name=fig_base_name, show_legend=show_legend)

In [None]:
plot_tsne(label='labels_risk', gender=gender, fig_base_name=fig_base_name, show_legend=show_legend)

## Supplementary Figure 23
Single models, mean and ensemble ROC curves for anomaly detection task and external testing dataset, with models trained on the entire internal dataset (**strategy 2**).

In [None]:
def plot_all_roc(task, strategy, set_name, gender, fig_base_name, color="blue"):
    df = pd.read_excel(add_gender(join(input_path, f"predictions_{task}_{set_name}_{strategy}.xlsx"), gender))
    # PLot ROC curves for single models
    roc_curves = []
    for i in [0, 1, 2, 3, 4]:
        roc = compute_roc(X=df[f"model_{i}"], y=df["label"], pos_label=1)
        roc_curves.append(roc)
        plot_roc(roc, label=f"Model {i + 1}", color=color_palette[i])
    plt.title(add_gender(f"{titles_dict[set_name]} - 5-Folds", gender, True))
    plt.savefig(f"images/paper/{fig_base_name}_singleModels.png", dpi=dpi)
    plt_show()
    
    # Plot ROC curve for the ensemble model
    plot_roc_bootstrap(X=df["ensemble"], y=df["label"], pos_label=1, label=f"{titles_dict[set_name]}",
                       color=color, n_bootstrap=n_boot, show_boots=False, random_state=random_seed)
    plt.title(add_gender(f"{titles_dict[set_name]} - Ensemble", gender, True)) 
    plt.suptitle("")     # Clear the subtitle
    plt.savefig(f"images/paper/{fig_base_name}_ensemble.png", dpi=dpi)
    plt_show()
    
    # PLot the mean ROC curves across single models
    plot_mean_roc(roc_curves, show_ci=False, show_ti=False, show_all=True, color=color)
    plt.title(add_gender(f"{titles_dict[set_name]} - Mean ROC", gender, True))
    plt.savefig(f"images/paper/{fig_base_name}_meanROC.png", dpi=dpi)
    plt_show()

In [None]:
task = "anomaly_detection"
strategy = "strategy2"
gender = ""
set_name = "test_external"
fig_base_name = "SupplementaryFigure23"

plot_all_roc(task=task, strategy=strategy, set_name=set_name, gender=gender, fig_base_name=fig_base_name)

## Supplementary Figure 24
Same as Supplementary Figure 23, but only for male patients.

In [None]:
gender = "m"
fig_base_name = "SupplementaryFigure24"
plot_all_roc(task=task, strategy=strategy, set_name=set_name, gender=gender, fig_base_name=fig_base_name)

## Supplementary Figure 25
Same as Supplementary Figure 23, but only for female patients.

In [None]:
gender = "f"
fig_base_name = "SupplementaryFigure25"
plot_all_roc(task=task, strategy=strategy, set_name=set_name, gender=gender, fig_base_name=fig_base_name)

## Supplementary Figure 26
Confusion matrix at different thresholds for anomaly detection task and external testing dataset, with models trained on the entire internal dataset (strategy 2).

In [None]:
task = "anomaly_detection"
strategy = "strategy2"
gender = ""
plot_confusion_matrices_thresholds(task=task, strategy=strategy, set_names=["test_external"], gender=gender,
                                   thresholds=thresholds, base_name="SupplementaryFigure26")

## Supplementary Figure 27
Same as Supplementary Figure 26, but only for male patients.

In [None]:
gender = "m"
plot_confusion_matrices_thresholds(task=task, strategy=strategy, set_names=["test_external"], gender=gender,
                                   thresholds=thresholds, base_name="SupplementaryFigure27")

## Supplementary Figure 28
Same as Supplementary Figure 26, but only for female patients.

In [None]:
gender = "f"
plot_confusion_matrices_thresholds(task=task, strategy=strategy, set_names=["test_external"], gender=gender,
                                   thresholds=thresholds, base_name="SupplementaryFigure28")