In [None]:
from operator import itemgetter

import numpy as np
import pandas as pd
import torch
from rich.progress import track
import matplotlib.pyplot as plt

from notebooks.helpers import import_artifacts_from_runID
from utils.tools import calculate_metric_and_confidence_interval
from utils.plot_utils import plot_roc_curves

In [None]:
run_id = "181b71e6accd49d5b58c277c963c9308"
artifacts = import_artifacts_from_runID(run_id)
model, data_loader, device, config = itemgetter("model", "data_loader", "device", "config")(
	artifacts
)

predictions_test = []
targets_test = []
labels_test = []
imagings_test = []
with torch.no_grad():
	for data, target, metadata in track(data_loader, description="Loading data..."):
		prediction = model(data.to(device))

		predictions_test.append(prediction.cpu().numpy())
		targets_test.append(target.numpy())
		labels_test.append(metadata["label"])
		imagings_test.append(metadata["imaging"])

predictions_test = np.concatenate(predictions_test).flatten()
targets_test = np.concatenate(targets_test).flatten()
labels_test = np.concatenate(labels_test).flatten()
imagings_test = np.concatenate(imagings_test).flatten()

test_df = pd.DataFrame.from_dict(
	{
		"imaging": imagings_test,
		"label": labels_test,
		"target": targets_test,
		"prediction": predictions_test.round().astype("int"),
		"prediction_proba": predictions_test,
	}
)
print(test_df.astype("object").groupby("imaging").count())

In [None]:
from sklearn.metrics import roc_auc_score

imaging4_df = test_df[test_df["imaging"] == "ana-krompir-4-22"]
imaging5_df = test_df[test_df["imaging"] == "ana-krompir-5-22"]

mean, ci = calculate_metric_and_confidence_interval(imaging4_df, roc_auc_score)
print(f"{mean:.3f} ({ci[0]:.3f}, {ci[1]:.3f})")
mean, ci = calculate_metric_and_confidence_interval(imaging5_df, roc_auc_score)
print(f"{mean:.3f} ({ci[0]:.3f}, {ci[1]:.3f})")

In [None]:

data_df = [imaging4_df.copy(), imaging5_df.copy()]
plot_roc_curves(data_df)