In [None]:
%load_ext autoreload
%autoreload 2

import os
from operator import itemgetter

import mlflow
import numpy as np
import pandas as pd
import torch
from rich.progress import track
import matplotlib.pyplot as plt
from sklearn.metrics import f1_score, precision_score, recall_score, roc_auc_score
from IPython.display import clear_output

from notebooks.helpers import import_artifacts_from_runID
from utils.utils import ensure_dir, write_txt
from utils.tools import calculate_metric_and_confidence_interval
from utils.plot_utils import plot_roc_curves
from configs import configs

In [None]:
def load_test_df(run_id):
	artifacts = import_artifacts_from_runID(run_id)
	model, data_loader, device, config = itemgetter("model", "data_loader", "device", "config")(
		artifacts
	)

	predictions_test = []
	targets_test = []
	labels_test = []
	imagings_test = []
	with torch.no_grad():
		for data, target, metadata in track(data_loader, description="Loading data..."):
			prediction = model(data.to(device))

			predictions_test.append(prediction.cpu().numpy())
			targets_test.append(target.numpy())
			labels_test.append(metadata["label"])
			imagings_test.append(metadata["imaging"])
	
	clear_output(wait=True)

	predictions_test = np.concatenate(predictions_test).flatten()
	targets_test = np.concatenate(targets_test).flatten()
	labels_test = np.concatenate(labels_test).flatten()
	imagings_test = np.concatenate(imagings_test).flatten()

	test_df = pd.DataFrame.from_dict(
		{
			"imaging": imagings_test,
			"label": labels_test,
			"target": targets_test,
			"prediction": predictions_test.round().astype("int"),
			"prediction_proba": predictions_test,
		}
	)
	return test_df, config

In [10]:
def create_per_imaging_report(test_df, add_counts=False):
	msg = ""
	ST_SPACE = 20
	for name, df in test_df.groupby("imaging"):
		msg += f"{name:<{ST_SPACE}}"
		mean, ci = calculate_metric_and_confidence_interval(df, roc_auc_score, "prediction_proba")
		msg += f"{mean:.3f} ({ci[0]:.3f}, {ci[1]:.3f})    "
		mean, ci = calculate_metric_and_confidence_interval(df, f1_score)
		msg += f"{mean:.3f} ({ci[0]:.3f}, {ci[1]:.3f})    "
		mean, ci = calculate_metric_and_confidence_interval(df, precision_score)
		msg += f"{mean:.3f} ({ci[0]:.3f}, {ci[1]:.3f})   "
		mean, ci = calculate_metric_and_confidence_interval(df, recall_score)
		msg += f"{mean:.3f} ({ci[0]:.3f}, {ci[1]:.3f})   \n"
	msg += f"{'Total':<{ST_SPACE}}" 
	mean, ci = calculate_metric_and_confidence_interval(test_df, roc_auc_score, "prediction_proba")
	msg += f"{mean:.3f} ({ci[0]:.3f}, {ci[1]:.3f})    "
	mean, ci = calculate_metric_and_confidence_interval(test_df, f1_score)
	msg += f"{mean:.3f} ({ci[0]:.3f}, {ci[1]:.3f})    "
	mean, ci = calculate_metric_and_confidence_interval(test_df, precision_score)
	msg += f"{mean:.3f} ({ci[0]:.3f}, {ci[1]:.3f})   "
	mean, ci = calculate_metric_and_confidence_interval(test_df, recall_score)
	msg += f"{mean:.3f} ({ci[0]:.3f}, {ci[1]:.3f})   \n\n"
	if add_counts:
		msg += test_df.astype("object").groupby("imaging").count().to_string()
	return msg

In [None]:
# get experiments ids-es and remove .trash
experiments_ids = [f for f in os.listdir(configs.MODEL_REGISTRY) if not f.startswith('.')]
# write data dict where key represent experiment by name and value correspond to runs under that experiment
data_all = {mlflow.get_experiment(exp_id).name: mlflow.search_runs(exp_id) for exp_id in experiments_ids}
# extract test ids
run_ids_test = data_all["train_CNN"]["run_id"].tolist()
# load test dfs
test_data = [(load_test_df(run_id)) for run_id in run_ids_test]

In [None]:
save_roc_curves_dir = ensure_dir(configs.BASE_DIR / "saved/roc_curves")
save_metrics_report_dir = ensure_dir(configs.BASE_DIR / "saved/metrics_report")
report = ""

for test_df, config in test_data:
	imagings_str = "".join(config["data_loader"]["args"]["imagings_used"])
	imagings_str = "".join(list(filter(str.isdigit, imagings_str)))
	sampler_str = config["data_loader"]["args"]["data_sampler"]
	name = sampler_str + imagings_str

	dfs = [df for _, df in test_df.groupby("imaging")]
	plot_roc_curves(dfs, title=name)
	save_path = save_roc_curves_dir / f"{name}.pdf"
	plt.savefig(save_path, format="pdf", bbox_inches="tight")
	
	report += f"{name}\n"
	report += create_per_imaging_report(test_df, add_counts=False)

write_txt(report, save_metrics_report_dir / "metrics_report.txt")