In [None]:
from src import evaluation, util

In [None]:
model = "FNO_uniform_var10_plog100_seed1_20251113_202554"
dataset_name_id = "uniform_var10_plog100_seed1"
dataset_name_ood = "uniform_var20_plog100_seed1"  # or None if not used

In [None]:
# Configuration: Dataset names and model checkpoint
checkpoint_path = f"model_training/data/processed/model/{model}/best_model_state_dict.pt"
dataset_pt_path_id = f"model_training/data/raw/{dataset_name_id}/{dataset_name_id}.pt"
# Optional OOD dataset
dataset_pt_path_ood = f"model_training/data/raw/{dataset_name_ood}/{dataset_name_ood}.pt" if dataset_name_ood is not None else None

In [None]:
# Load inference context + build evaluation DataFrame (ID)
model_id, loader_id, processor_id, device = evaluation.evaluation_inference.load_inference_context(
    dataset_path=dataset_pt_path_id,
    checkpoint_path=checkpoint_path,
    batch_size=1,
    ood_fraction=1.0,
)

df_eval_id = evaluation.evaluation_dataframe.create_evaluation_df(
    model=model_id,
    loader=loader_id,
    processor=processor_id,
    device=device,
    dataset_name=dataset_name_id,
)

# Optional: Load OOD dataset + build OOD DF
df_eval_ood = None

if dataset_pt_path_ood is not None:
    model_ood, loader_ood, processor_ood, _ = evaluation.evaluation_inference.load_inference_context(
        dataset_path=dataset_pt_path_ood,
        checkpoint_path=checkpoint_path,
        batch_size=1,
        ood_fraction=1.0,
    )

    df_eval_ood = evaluation.evaluation_dataframe.create_evaluation_df(
        model=model_ood,
        loader=loader_ood,
        processor=processor_ood,
        device=device,
        dataset_name=dataset_name_ood,
    )

In [None]:
toggle = util.util_nb.make_toggle_shortcut(df_eval_id, dataset_name_id)

# Global Error Analysis
tab1_plots = [
    toggle("1-1. Global error metrics", lambda: evaluation.evaluation_plots.plot_global_error_metrics(df_eval_id)),
    toggle("1-2. Global error distribution (|U_error|)", lambda: evaluation.evaluation_plots.plot_error_distribution(df_eval_id, field="U")),
    toggle("1-3. GT vs Prediction (U, global)", lambda: evaluation.evaluation_plots.plot_global_gt_vs_pred(df_eval_id, field="U")),
    toggle("1-4. Mean error maps", lambda: evaluation.evaluation_plots.plot_mean_error_maps(df_eval_id)),
    toggle("1-5. Std error maps", lambda: evaluation.evaluation_plots.plot_std_error_maps(df_eval_id)),
]

tab1_section = util.util_nb.make_dropdown_section(
    tab1_plots,
    f"{dataset_name_id} - Global Error Analysis",
)

# ID vs OOD Comparison (included only if OOD exists)
tab2_section = None
if df_eval_ood is not None:
    tab2_plots = [
        toggle("2-1. ID vs OOD metrics", lambda: evaluation.evaluation_plots.plot_id_vs_ood_metrics(df_eval_id, df_eval_ood)),
        toggle(
            "2-2. ID vs OOD error distributions (|U_error|)",
            lambda: evaluation.evaluation_plots.plot_id_vs_ood_error_distributions(df_eval_id, df_eval_ood),
        ),
        toggle("2-3. OOD - ID mean error map", lambda: evaluation.evaluation_plots.plot_id_vs_ood_mean_error_difference(df_eval_id, df_eval_ood)),
    ]

    tab2_section = util.util_nb.make_dropdown_section(
        tab2_plots,
        f"{dataset_name_id} vs {dataset_name_ood} - ID/OOD Comparison",
    )

# Permeability Sensitivity (global)
tab3_plots = [
    toggle("3-1. Error vs permeability magnitude", lambda: evaluation.evaluation_plots.plot_error_vs_kappa_magnitude(df_eval_id)),
    toggle("3-2. Error vs anisotropy ratio", lambda: evaluation.evaluation_plots.plot_error_vs_anisotropy_ratio(df_eval_id)),
    toggle("3-3. Error vs mean permeability", lambda: evaluation.evaluation_plots.plot_error_vs_mean_kappa(df_eval_id)),
    toggle("3-4. Error vs permeability gradient", lambda: evaluation.evaluation_plots.plot_error_vs_kappa_gradient(df_eval_id)),
]

tab3_section = util.util_nb.make_dropdown_section(
    tab3_plots,
    f"{dataset_name_id} - Permeability Sensitivity",
)

# Sample Viewer (Prediction/GT/Error + Tensor Overlay)
tab4_plots = [
    toggle("4-1. Sample Viewer — Prediction / GT / Error", lambda: evaluation.evaluation_plots.plot_sample_prediction_overview(df_eval_id)),
    toggle(
        "4-2. Sample Viewer — kappa tensor (3x3) with overlays", lambda: evaluation.evaluation_plots.plot_sample_kappa_tensor_with_overlay(df_eval_id)
    ),
]

tab4_section = util.util_nb.make_dropdown_section(
    tab4_plots,
    f"{dataset_name_id} - Sample Viewer",
)

sections = [tab1_section, tab3_section, tab4_section]
tab_titles = [
    "1. Global Error Analysis",
    "2. Permeability Sensitivity",
    "3. Sample Viewer",
]

# Insert ID/OOD tab if available
if tab2_section is not None:
    sections.insert(1, tab2_section)
    tab_titles.insert(1, "2. ID vs OOD")

evaluation_panel = util.util_nb.make_lazy_panel_with_tabs(
    sections,
    tab_titles=tab_titles,
    open_btn_text=f"{dataset_name_id} - Open Evaluation",
    close_btn_text="Close",
)

display(evaluation_panel)