In [None]:
import numpy as np
import json
from pathlib import Path

def confusion_counts(y_true, probs, thr):
    y_true = y_true.astype(int)
    y_pred = (probs >= thr).astype(int)
    tn = int(((y_true == 0) & (y_pred == 0)).sum())
    fp = int(((y_true == 0) & (y_pred == 1)).sum())
    fn = int(((y_true == 1) & (y_pred == 0)).sum())
    tp = int(((y_true == 1) & (y_pred == 1)).sum())
    recall = tp / (tp + fn + 1e-12)
    fpr = fp / (fp + tn + 1e-12)
    precision = tp / (tp + fp + 1e-12)
    return {"tn": tn, "fp": fp, "fn": fn, "tp": tp,
            "recall": float(recall), "fpr": float(fpr), "precision": float(precision), "thr": float(thr)}

# load artifacts
w = json.loads(Path("../artifacts/ensemble/selected_w.json").read_text())["w"]
T = json.loads(Path("../artifacts/ensemble/selected_T.json").read_text())["T"]

# load probs
cnn_dir = Path("../artifacts/cnn")
xgb_dir = Path("../artifacts/xgb")

cnn_test_probs = np.load(cnn_dir / "test_probs.npy")
y_test = np.load(cnn_dir / "test_y.npy")     # same y as xgb test_y if you saved consistently


import pandas as pd
import xgboost as xgb

# --- Recompute XGB test probs from Optuna model, aligned to CNN ordering ---
FEAT_PATH = Path("../data/processed/vnat/xgb_features.parquet")
df_feat = pd.read_parquet(FEAT_PATH)

feature_cols = json.loads(Path("../artifacts/features/feature_columns.json").read_text(encoding="utf-8"))

test_flow_ids = np.load(cnn_dir / "test_flow_ids.npy").astype(int)
test_df = df_feat.set_index("flow_id").loc[test_flow_ids].reset_index()

X_test = test_df[feature_cols].to_numpy(dtype=np.float32)

booster = xgb.Booster()
booster.load_model(str(xgb_dir / "xgb_model_optuna.json"))

dtest = xgb.DMatrix(X_test, feature_names=feature_cols)
xgb_test_probs = booster.predict(dtest)

# sanity check alignment
y_test_from_feats = test_df["label"].to_numpy(dtype=np.int64)
if not np.array_equal(y_test, y_test_from_feats):
    raise ValueError("y_test mismatch between CNN artifacts and xgb_features.parquet (alignment problem).")

# ensembly
p_final = w * xgb_test_probs + (1 - w) * cnn_test_probs

print("Selected w:", w, "Selected T:", T)
print("CNN test:", confusion_counts(y_test, cnn_test_probs, 0.5))
print("XGB test:", confusion_counts(y_test, xgb_test_probs, 0.5))
print("ENS test:", confusion_counts(y_test, p_final, T))

FileNotFoundError: [Errno 2] No such file or directory: '..\\artifacts\\cnn\\test_flow_ids.npy'

In [None]:
out = {
    "selected": {"w": float(w), "T": float(T)},
    "cnn_test_0_5": confusion_counts(y_test, cnn_test_probs, 0.5),
    "xgb_test_0_5": confusion_counts(y_test, xgb_test_probs, 0.5),
    "ens_test_T": confusion_counts(y_test, p_final, T),
    "notes": {
        "test_vpn_count": int((y_test==1).sum()),
        "test_nonvpn_count": int((y_test==0).sum())
    }
}

Path("../artifacts/eval").mkdir(parents=True, exist_ok=True)
Path("../artifacts/eval/vnat_internal.json").write_text(json.dumps(out, indent=2), encoding="utf-8")
print("Saved ../artifacts/eval/vnat_internal.json")

Saved ../artifacts/eval/vnat_internal.json
