In [2]:
from __future__ import annotations

import datetime as dt
import json
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple


# -------------------- helpers --------------------

def safe_get(d: Any, path: List[str], default: Any = None) -> Any:
    cur = d
    for k in path:
        if not isinstance(cur, dict) or k not in cur:
            return default
        cur = cur[k]
    return cur


def load_json(path: Path) -> Tuple[Optional[Any], Optional[str]]:
    try:
        with path.open("r", encoding="utf-8") as f:
            return json.load(f), None
    except Exception as e:
        return None, f"{type(e).__name__}: {e}"


def discover_summary_files(root: Path) -> List[Path]:
    # finds Summary.json / summary.json / SUMMARY.JSON, etc., at ANY depth
    return sorted([p for p in root.rglob("*") if p.is_file() and p.name.lower() == "summary.json"])


def normalize_run(raw: Dict[str, Any], source_path: Path, root: Path) -> Dict[str, Any]:
    """
    Create a consistent, AI-friendly view while preserving the original raw JSON.
    Your Summary.json structure is like the attached example:
    dataset/root/classes/model/peft/lora/params/storage/training/test_*/inference_benchmark.
    """
    rel = source_path.resolve().relative_to(root.resolve())

    facts = {
        # identity
        "dataset": safe_get(raw, ["dataset"]),
        "root": safe_get(raw, ["root"]),
        "model": safe_get(raw, ["model"]),
        "peft": safe_get(raw, ["peft"]),
        "classes": safe_get(raw, ["classes"]),
        "num_classes": len(safe_get(raw, ["classes"], []) or []),

        # lora/params/storage/training in a predictable compact layout
        "lora": safe_get(raw, ["lora"]),
        "params": safe_get(raw, ["params"]),
        "storage": {
            "best_model_path": safe_get(raw, ["storage", "best_model_path"]),
            "adapter_only_enabled": safe_get(raw, ["storage", "adapter_only_enabled"]),
            "adapter_only_mb": safe_get(raw, ["storage", "adapter_only_mb"]),
            "adapter_only_bytes": safe_get(raw, ["storage", "adapter_only_bytes"]),
        },
        "training": {
            "epochs_ran": safe_get(raw, ["training", "epochs_ran"]),
            "best_val_loss": safe_get(raw, ["training", "best_val_loss"]),
            "best_val_macro_f1": safe_get(raw, ["training", "best_val_macro_f1"]),
            "total_train_time_sec": safe_get(raw, ["training", "total_train_time_sec"]),
            "mean_epoch_time_sec": safe_get(raw, ["training", "mean_epoch_time_sec"]),
            "peak_train_vram_gb": safe_get(raw, ["training", "peak_train_vram_gb"]),
        },

        # tests (keep same naming as your files)
        "test_uncalibrated": safe_get(raw, ["test_uncalibrated"]),
        "test_temp_scaled": safe_get(raw, ["test_temp_scaled"]),

        # inference
        "inference_benchmark": safe_get(raw, ["inference_benchmark"]),
        "inference_quick": {
            "b1_ms_per_img": safe_get(raw, ["inference_benchmark", "latency_batch1", "ms_per_img"]),
            "b1_imgs_per_s": safe_get(raw, ["inference_benchmark", "latency_batch1", "imgs_per_s"]),
            "b64_ms_per_img": safe_get(raw, ["inference_benchmark", "throughput_batchN", "ms_per_img"]),
            "b64_imgs_per_s": safe_get(raw, ["inference_benchmark", "throughput_batchN", "imgs_per_s"]),
        },
    }

    return {
        "source": {
            "relative_path": str(rel).replace("\\", "/"),
            "absolute_path": str(source_path.resolve()),
        },
        "facts": facts,  # normalized for LLMs
        "raw": raw,      # original JSON preserved
    }


def group_by_dataset(runs: List[Dict[str, Any]]) -> Dict[str, List[Dict[str, Any]]]:
    by_ds: Dict[str, List[Dict[str, Any]]] = {}
    for r in runs:
        ds = safe_get(r, ["facts", "dataset"]) or "UNKNOWN_DATASET"
        by_ds.setdefault(ds, []).append(r)

    # sort within each dataset by macro_f1 (then acc) descending for readability
    def score(run: Dict[str, Any]) -> float:
        mf1 = safe_get(run, ["facts", "test_uncalibrated", "macro_f1"])
        acc = safe_get(run, ["facts", "test_uncalibrated", "acc"])
        try:
            return float(mf1 if mf1 is not None else (acc if acc is not None else 0.0))
        except Exception:
            return 0.0

    for ds in by_ds:
        by_ds[ds].sort(key=score, reverse=True)

    return by_ds


# -------------------- main (no args, Jupyter-friendly) --------------------

ROOT = Path.cwd()
OUT_PATH = ROOT / f"{ROOT.name}_combined_summaries.json"

summary_files = discover_summary_files(ROOT)

runs: List[Dict[str, Any]] = []
errors: List[Dict[str, Any]] = []

for fp in summary_files:
    obj, err = load_json(fp)
    if err is not None:
        errors.append({"file": str(fp.resolve()), "error": err})
        continue
    if not isinstance(obj, dict):
        errors.append({"file": str(fp.resolve()), "error": f"Expected dict, got {type(obj).__name__}."})
        continue
    runs.append(normalize_run(obj, fp, ROOT))

combined = {
    "meta": {
        "created_utc": dt.datetime.utcnow().replace(microsecond=0).isoformat() + "Z",
        "search_root": str(ROOT.resolve()),
        "output_file": str(OUT_PATH.resolve()),
        "summary_files_found": len(summary_files),
        "runs_loaded": len(runs),
        "runs_failed": len(errors),
        "note": "Each run includes (source, facts, raw). facts is normalized for AI; raw preserves original Summary.json.",
    },
    "runs": runs,  # flat list is easiest for LLMs
    "by_dataset": group_by_dataset(runs),
    "errors": errors,
}

with OUT_PATH.open("w", encoding="utf-8") as f:
    json.dump(combined, f, indent=2, ensure_ascii=False)

print(f"[OK] Current folder: {ROOT}")
print(f"[OK] Found Summary.json files: {len(summary_files)}")
print(f"[OK] Loaded runs: {len(runs)} | Failed: {len(errors)}")
print(f"[OK] Saved: {OUT_PATH}")
if errors:
    print("[WARN] Some files failed to load. See combined['errors'] in the output JSON.")


[OK] Current folder: D:\AIUB\DSP\Code\Models\DeiT-Small + AdaptFormer (MLP-parallel adapters)
[OK] Found Summary.json files: 5
[OK] Loaded runs: 5 | Failed: 0
[OK] Saved: D:\AIUB\DSP\Code\Models\DeiT-Small + AdaptFormer (MLP-parallel adapters)\DeiT-Small + AdaptFormer (MLP-parallel adapters)_combined_summaries.json
