In [None]:
# ─────────────────────────────────────────────────────────────────────────────
# 05_benchmarks.ipynb
# ─────────────────────────────────────────────────────────────────────────────

%run setup.py
import numpy as np, pandas as pd, torch
from pathlib import Path
from src.pretrained_models import default_experts, MoEClassifier

# ─────────────────────────────────────────────────────────────────────────────
# 1) Load the indices of the experts that the MoE gate selected
#    (these were saved under models/gates/moe_selected_idxs.npy)
# ─────────────────────────────────────────────────────────────────────────────
subset = np.load("models/gates/moe_selected_idxs.npy").tolist()

# ─────────────────────────────────────────────────────────────────────────────
# 2) Instantiate exactly those experts in the same order as 'subset'
#    Make sure 'lr_path' points to your QuoraDistilExpert LR pickle under models/pretrained/
# ─────────────────────────────────────────────────────────────────────────────
experts = default_experts(
    emb_path="../data/processed/question_embeddings.npy",
    lr_path="../models/pretrained/quoradistil_lr.pkl"
)
experts = [experts[i] for i in subset]

# ─────────────────────────────────────────────────────────────────────────────
# 3) Reconstruct the MoEClassifier, load the saved gate weights, then eval on TEST
#    (the final gate state was saved under models/gates/final_moe_gate.pt)
# ─────────────────────────────────────────────────────────────────────────────
moe = MoEClassifier(experts, lr=1, epochs=0)
moe.gate.load_state_dict(torch.load("models/gates/final_moe_gate.pt", map_location="cpu"))

# ─────────────────────────────────────────────────────────────────────────────
# 4) Load TEST split, run MoE predict, and print log‐loss / accuracy / F1
# ─────────────────────────────────────────────────────────────────────────────
test_df = pd.read_csv("../data/splits/test.csv").dropna(subset=["question1", "question2"])
pairs   = list(zip(test_df.question1, test_df.question2))
y_true  = test_df.is_duplicate.values.astype(int)

from sklearn.metrics import log_loss, accuracy_score, f1_score

p = moe.predict_prob(pairs)
print(f"TEST - LL {log_loss(y_true, p):.4f}  -  ACC {accuracy_score(y_true, p > 0.5):.4f}  -  F1 {f1_score(y_true, p > 0.5):.4f}")