In [None]:
import sys

sys.path.append("/home/dylan-kmutt/ubpd")

In [None]:
print("hello")

In [None]:
import os


def get_model_paths(model_id):
    model_paths = []
    for path in os.listdir("../../checkpoints"):
        if str(model_id) in path:
            model_paths.append(f"../../checkpoints/{path}")
    model_paths = sorted(model_paths)
    print("Models found: ")
    for i in model_paths:
        print(f"-- {i}")
    return model_paths

In [None]:
from src.evaluate import Evaluator
from src.helper import load_model, aggregate_fold_metrics
from src.preprocessing import PairedTransform
from src.dataset import UBPDataset

In [None]:
def get_pids(seed, ultrasound):
    if seed == 42:
        if ultrasound == "u1":
            p_ids = [16, 19, 12, 36, 41]
        if ultrasound == "u2":
            p_ids = [61, 77, 79, 50, 75, 94, 68, 96]
        if ultrasound == "combined":
            p_ids = [16, 19, 12, 36, 41, 61, 77, 79, 50, 75, 94, 68, 96]
    elif seed == 94:
        if ultrasound == "u1":
            p_ids = [11, 35, 32, 9, 8]
        if ultrasound == "u2":
            p_ids = [50, 62, 98, 4, 81, 96, 70, 68]
        if ultrasound == "combined":
            p_ids = [11, 35, 32, 9, 8, 50, 62, 98, 4, 81, 96, 70, 68]
    elif seed == 33:
        if ultrasound == "u1":
            p_ids = [38, 19, 10, 42, 8]
        if ultrasound == "u2":
            p_ids = [131, 53, 6, 47, 78, 49, 76, 64]
        if ultrasound == "combined":
            p_ids = [38, 19, 10, 42, 8, 131, 53, 6, 47, 78, 49, 76, 64]
    return p_ids

In [None]:
import numpy as np


def evaluate(variants):
    # CV
    print(f"üöÄBegininng evaluating model id {variants['model_id']}\n")
    fold_overall_means = []
    fold_results = []  # store per-fold evaluator outputs (dicts)

    include_classes = variants.get("include_classes")
    class_names = {
        1: "dongmai (artery)",
        2: "jingmai (vein)",
        3: "jirouzuzhi (muscle)",
        4: "shenjing (nerve)",
    }
    print("Included classes:")
    for cid in include_classes:
        print(f"  {cid}: {class_names.get(cid, 'unknown')}")

    include_classes = variants["include_classes"]
    n_inc = len(include_classes)
    if n_inc == 1:
        variants["loss"] = "bce"
        num_classes_for_model = 1  # single foreground channel
    else:
        variants["loss"] = "ce"
        num_classes_for_model = n_inc + 1  # background + selected classes only
    variants["num_classes"] = num_classes_for_model

    p_ids = get_pids(variants["seed"], variants["test_pids"])

    paired_test_tf = PairedTransform(size=variants["image_size"], aug=False)
    test_dataset = UBPDataset(
        p_ids=p_ids,
        include_classes=include_classes,
        image_dir="../../data/dataset/images",
        json_dir="../../data/dataset/labels/json_train",
        joint_transform=paired_test_tf,
        keep_original_indices=True,
    )

    print("\nTest landmarks stat: ")
    test_dataset.print_stats()
    len(test_dataset)

    dices = []

    model_paths = get_model_paths(variants["model_id"])
    for model_path in model_paths:
        print(f"\nEvaluating: {model_path} ------------------")

        model = load_model(
            model_path,
            model_name=variants["model_name"],
            in_channels=1,
            num_classes=num_classes_for_model,
            device=variants["device"],
        )

        # Evaluate one fold (you can move this after the loop to evaluate the final/best model instead)
        evaluator = Evaluator(
            model,
            test_dataset,
            num_classes=num_classes_for_model,
            ignore_empty_classes=False,
        )
        res = evaluator.evaluate_dice_score(show_plot=True)
        evaluator.visualize_ranked()
        dices.append(evaluator.test_scores)

        try:
            overall_mean = res.get("overall", {}).get("mean", float("nan"))
        except Exception:
            overall_mean = float("nan")
        fold_overall_means.append(
            float(overall_mean) if overall_mean is not None else float("nan")
        )
        fold_results.append(res)

    overall_mean, overall_std, per_class_stats = aggregate_fold_metrics(fold_results)
    if not np.isnan(overall_mean):
        print(
            f"\nüéØ Average overall Dice across folds: {overall_mean:.4f} ¬± {overall_std:.4f}  (n={len(fold_results)})"
        )
    else:
        print("\n‚ö†Ô∏è No per-fold overall Dice scores collected.")

    # Print per-landmark (per-class) averages
    if per_class_stats:
        print("\nüìå Per-landmark average Dice across folds:")
        for cid in sorted(per_class_stats.keys()):
            stats = per_class_stats[cid]
            name = class_names.get(cid, f"class_{cid}")
            print(
                f"  {cid}: {name:<20s} mean¬±std: {stats['mean']:.4f} ¬± {stats['std']:.4f}  (folds={stats['n_folds']})"
            )
    else:
        print("\n‚ö†Ô∏è No per-class stats available to aggregate.")

    print("\n‚úÖ Experiment complete across folds.")
    return {
        "evaluator": evaluator,
        "test_dataset": test_dataset,
        "fold_overall_means": fold_overall_means,
        "fold_results": fold_results,
        "dices": dices,
    }

In [None]:
import torch

variants = {
    "include_classes": [1, 2, 3, 4],
    "model_name": "unet",
    "model_id": 909994,
    "test_pids": "u1",
    "image_size": 512,
    "device": "cuda" if torch.cuda.is_available() else "cpu",
    "seed": 94,
}

In [None]:
result = evaluate(variants)

In [None]:
ev = result["evaluator"]
ev.visualize_single(1)

In [None]:
# ev.visualize_single(-20)