# 04 - Results Analysis

This notebook consolidates final evaluation and ablation outcomes:

- ablation comparisons
- per-class performance
- confusion matrix and ROC plots
- calibration and reliability
- failure/fairness analysis

In [None]:
from __future__ import annotations

from pathlib import Path

import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

sns.set_theme(style="whitegrid")
PROJECT_ROOT = Path.cwd().resolve().parents[0] if (Path.cwd() / "src").exists() is False else Path.cwd()
OUTPUT_DIR = PROJECT_ROOT / "outputs"
ABLATION_CSV = OUTPUT_DIR / "ablation" / "ablation_results.csv"
print("Looking for ablation csv:", ABLATION_CSV)

In [None]:
# Ablation comparison table
if ABLATION_CSV.exists():
    ablation_df = pd.read_csv(ABLATION_CSV)
    display(ablation_df)
else:
    print("No ablation_results.csv found yet.")
    ablation_df = pd.DataFrame()

In [None]:
# Example metric chart if metrics are available
if not ablation_df.empty and "balanced_accuracy" in ablation_df.columns:
    plt.figure(figsize=(10, 4))
    sns.barplot(data=ablation_df, x="experiment", y="balanced_accuracy")
    plt.xticks(rotation=45, ha="right")
    plt.title("Balanced Accuracy by Ablation")
    plt.tight_layout()
    plt.show()
else:
    print("Balanced accuracy column not available yet.")

In [None]:
# Placeholders for confusion matrix, ROC, reliability, and fairness artifacts
artifact_paths = {
    "confusion_matrix": OUTPUT_DIR / "confusion_matrix.png",
    "roc_curves": OUTPUT_DIR / "roc_curves.png",
    "reliability": OUTPUT_DIR / "reliability.png",
    "fairness_report": OUTPUT_DIR / "fairness_report.csv",
}

for name, path in artifact_paths.items():
    print(f"{name}: {'FOUND' if path.exists() else 'MISSING'} -> {path}")