In [None]:
import sys
from pathlib import Path

# Find repo root (directory that contains "src")
p = Path.cwd().resolve()
for candidate in [p, *p.parents]:
    if (candidate / "src").is_dir():
        if str(candidate) not in sys.path:
            sys.path.insert(0, str(candidate))
        print("Found repo root:", candidate)
        break
else:
    raise RuntimeError("Could not find repo root (no 'src' folder in parents).")


In [None]:
from pathlib import Path
from src.config import PATHS, CFG
from src.eval.run_experiment import evaluate_experiment, ExperimentPaths

TRAIN_OUT = Path(PATHS.TRAIN_OUT)

EXPERIMENTS = {
    # (1) USE prompt-only
    "use_prompt_only": {
        "X": TRAIN_OUT / "X_use_prompt_only.npy",
        "bundle": TRAIN_OUT / "use_prompt_only__cls_plus_regressors.joblib",
        "feature_mode": "prompt_only",
    },
    # (2) OpenAI prompt-only
    "openai_prompt_only": {
        "X": TRAIN_OUT / "X_openai_prompt_only.npy",
        "bundle": TRAIN_OUT / "openai_prompt_only__cls_plus_regressors.joblib",
        "feature_mode": "prompt_only",
    },
    # (3) USE prompt + map
    "use_prompt_plus_map": {
        "X": TRAIN_OUT / "X_use_prompt_plus_map.npy",
        "bundle": TRAIN_OUT / "use_prompt_plus_map__cls_plus_regressors.joblib",
        "feature_mode": "prompt_plus_map",
    },
    # (4) OpenAI prompt + map
    "openai_prompt_plus_map": {
        "X": TRAIN_OUT / "X_openai_prompt_plus_map.npy",
        "bundle": TRAIN_OUT / "openai_prompt_plus_map__cls_plus_regressors.joblib",
        "feature_mode": "prompt_plus_map",
    },
}

# Quick file check (will fail now until main pipeline is revised)
missing = []
for k, v in EXPERIMENTS.items():
    if not v["X"].exists():
        missing.append(f"{k}: missing X file -> {v['X']}")
    if not v["bundle"].exists():
        missing.append(f"{k}: missing bundle -> {v['bundle']}")

if missing:
    print("Artifacts missing (expected before running evaluation):")
    for m in missing:
        print(" -", m)
else:
    print("✅ All experiment artifacts found.")


In [None]:
RESULTS = globals().get("RESULTS", {})

k = "use_prompt_only"
cfg = EXPERIMENTS[k]

RESULTS[k] = evaluate_experiment(
    name=k,
    paths=ExperimentPaths(
        pairs_path=PATHS.TRAIN_PAIRS_PARQUET,
        X_path=cfg["X"],
        bundle_path=cfg["bundle"],
    ),
    feature_mode=cfg["feature_mode"],
    map_dim=int(CFG.MAP_DIM),
    prompt_dim=int(CFG.PROMPT_DIM),
    group_col="map_id",
    seed=int(CFG.SEED),
    val_ratio=float(CFG.VAL_RATIO),
    test_ratio=float(CFG.TEST_RATIO),
)

print("✅ Done:", k)
print("Classifier TEST:", RESULTS[k]["classifier"]["TEST"]["acc"], RESULTS[k]["classifier"]["TEST"]["f1_macro"])
print("Oracle TEST:", RESULTS[k]["regressor_oracle"]["TEST_oracle"])
print("Pipeline TEST:", RESULTS[k]["regressor_pipeline"]["TEST_pipeline"])


In [None]:
k = "openai_prompt_only"
cfg = EXPERIMENTS[k]

RESULTS[k] = evaluate_experiment(
    name=k,
    paths=ExperimentPaths(
        pairs_path=PATHS.TRAIN_PAIRS_PARQUET,
        X_path=cfg["X"],
        bundle_path=cfg["bundle"],
    ),
    feature_mode=cfg["feature_mode"],
    map_dim=int(CFG.MAP_DIM),
    prompt_dim=int(CFG.PROMPT_DIM),
    group_col="map_id",
    seed=int(CFG.SEED),
    val_ratio=float(CFG.VAL_RATIO),
    test_ratio=float(CFG.TEST_RATIO),
)

print("✅ Done:", k)
print("Classifier TEST:", RESULTS[k]["classifier"]["TEST"]["acc"], RESULTS[k]["classifier"]["TEST"]["f1_macro"])
print("Oracle TEST:", RESULTS[k]["regressor_oracle"]["TEST_oracle"])
print("Pipeline TEST:", RESULTS[k]["regressor_pipeline"]["TEST_pipeline"])


In [None]:
k = "use_prompt_plus_map"
cfg = EXPERIMENTS[k]

RESULTS[k] = evaluate_experiment(
    name=k,
    paths=ExperimentPaths(
        pairs_path=PATHS.TRAIN_PAIRS_PARQUET,
        X_path=cfg["X"],
        bundle_path=cfg["bundle"],
    ),
    feature_mode=cfg["feature_mode"],
    map_dim=int(CFG.MAP_DIM),
    prompt_dim=int(CFG.PROMPT_DIM),
    group_col="map_id",
    seed=int(CFG.SEED),
    val_ratio=float(CFG.VAL_RATIO),
    test_ratio=float(CFG.TEST_RATIO),
)

print("✅ Done:", k)
print("Classifier TEST:", RESULTS[k]["classifier"]["TEST"]["acc"], RESULTS[k]["classifier"]["TEST"]["f1_macro"])
print("Oracle TEST:", RESULTS[k]["regressor_oracle"]["TEST_oracle"])
print("Pipeline TEST:", RESULTS[k]["regressor_pipeline"]["TEST_pipeline"])


In [None]:
k = "openai_prompt_plus_map"
cfg = EXPERIMENTS[k]

RESULTS[k] = evaluate_experiment(
    name=k,
    paths=ExperimentPaths(
        pairs_path=PATHS.TRAIN_PAIRS_PARQUET,
        X_path=cfg["X"],
        bundle_path=cfg["bundle"],
    ),
    feature_mode=cfg["feature_mode"],
    map_dim=int(CFG.MAP_DIM),
    prompt_dim=int(CFG.PROMPT_DIM),
    group_col="map_id",
    seed=int(CFG.SEED),
    val_ratio=float(CFG.VAL_RATIO),
    test_ratio=float(CFG.TEST_RATIO),
)

print("✅ Done:", k)
print("Classifier TEST:", RESULTS[k]["classifier"]["TEST"]["acc"], RESULTS[k]["classifier"]["TEST"]["f1_macro"])
print("Oracle TEST:", RESULTS[k]["regressor_oracle"]["TEST_oracle"])
print("Pipeline TEST:", RESULTS[k]["regressor_pipeline"]["TEST_pipeline"])


In [None]:
import pandas as pd

def row_for(model_key: str, split: str):
    R = RESULTS[model_key]
    return {
        "model": model_key,
        "split": split,
        "clf_acc": R["classifier"][split]["acc"],
        "clf_f1_macro": R["classifier"][split]["f1_macro"],
        "reg_oracle_MAE": R["regressor_oracle"][f"{split}_oracle"]["MAE"],
        "reg_oracle_RMSE": R["regressor_oracle"][f"{split}_oracle"]["RMSE"],
        "pipe_MAE": R["regressor_pipeline"][f"{split}_pipeline"]["MAE"],
        "pipe_RMSE": R["regressor_pipeline"][f"{split}_pipeline"]["RMSE"],
    }

keys = ["use_prompt_only", "openai_prompt_only"]
df_cmp_12 = pd.DataFrame([row_for(k, s) for s in ["VAL","TEST"] for k in keys]).reset_index(drop=True)
display(df_cmp_12)


In [None]:
keys = ["use_prompt_plus_map", "openai_prompt_plus_map"]
df_cmp_34 = pd.DataFrame([row_for(k, s) for s in ["VAL","TEST"] for k in keys]).reset_index(drop=True)
display(df_cmp_34)


In [None]:
keys = [
    "use_prompt_only",
    "openai_prompt_only",
    "use_prompt_plus_map",
    "openai_prompt_plus_map",
]
df_cmp_all = pd.DataFrame([row_for(k, s) for s in ["VAL","TEST"] for k in keys]).reset_index(drop=True)
display(df_cmp_all)


In [None]:
from pathlib import Path
from src.config import PATHS

out_dir = Path(PATHS.TRAIN_OUT)
out_dir.mkdir(parents=True, exist_ok=True)

df_cmp_12.to_csv(out_dir / "compare_1_vs_2_prompt_only.csv", index=False)
df_cmp_34.to_csv(out_dir / "compare_3_vs_4_prompt_plus_map.csv", index=False)
df_cmp_all.to_csv(out_dir / "compare_1_2_3_4_all.csv", index=False)

print("✅ Saved CSV tables to:", out_dir)
