In [1]:
import numpy as np
import pandas as pd

from sklearn.model_selection import GroupKFold
from sklearn.base import clone

from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.feature_selection import SelectKBest, mutual_info_classif, f_classif, VarianceThreshold

from sklearn.linear_model import LogisticRegression
from sklearn.multiclass import OneVsRestClassifier
from sklearn.ensemble import ExtraTreesClassifier

from sklearn.metrics import confusion_matrix, balanced_accuracy_score


In [2]:
CSV_PATH = "training_data_75.csv"
SEED = 42
TEST_SIZE_FILES = 0.25

LOW_MAX_THR = 2  # low = 1-2, high = 4-64

N_SPLITS_GATE = 5
N_SPLITS_LOW  = 5
N_SPLITS_HIGH = 4


In [3]:
df = pd.read_csv(CSV_PATH)

y_thr = df["min_threshold"].astype(int).values
y_gate = (y_thr > LOW_MAX_THR).astype(int)
groups = df["file"].astype(str).values

drop_cols = [
    "min_threshold", "file", "family",
    "forward_runtime", "max_fidelity_achieved",
    "forward_shots", "forward_peak_rss_mb", "n_thresholds_tested",
]
drop_cols = [c for c in drop_cols if c in df.columns]

X_df = df.drop(columns=drop_cols).copy()

print("Rows:", len(df), "| Unique files:", df["file"].nunique())
print("Num features:", X_df.shape[1])
print("Thr counts:", dict(zip(*np.unique(y_thr, return_counts=True))))
print("Gate counts:", dict(zip(*np.unique(y_gate, return_counts=True))))


Rows: 137 | Unique files: 36
Num features: 64
Thr counts: {np.int64(1): np.int64(54), np.int64(2): np.int64(46), np.int64(4): np.int64(6), np.int64(8): np.int64(17), np.int64(16): np.int64(12), np.int64(64): np.int64(2)}
Gate counts: {np.int64(0): np.int64(100), np.int64(1): np.int64(37)}


In [4]:
def stratified_file_split_by_gate(df, test_size=0.25, seed=42, low_max_thr=2):
    rng = np.random.RandomState(seed)

    file_info = df.groupby("file", as_index=False).agg(
        thr=("min_threshold", "first"),
        n_rows=("min_threshold", "size")
    )
    file_info["gate"] = (file_info["thr"].astype(int) > low_max_thr).astype(int)

    low_files  = file_info.loc[file_info["gate"] == 0, "file"].astype(str).tolist()
    high_files = file_info.loc[file_info["gate"] == 1, "file"].astype(str).tolist()

    rng.shuffle(low_files)
    rng.shuffle(high_files)

    n_low_test  = max(1, int(round(len(low_files) * test_size)))  if len(low_files)  > 1 else 1
    n_high_test = max(1, int(round(len(high_files) * test_size))) if len(high_files) > 1 else 1

    test_files  = set(low_files[:n_low_test] + high_files[:n_high_test])
    train_files = set(file_info["file"].astype(str)) - test_files

    train_idx = df.index[df["file"].astype(str).isin(train_files)].to_numpy()
    test_idx  = df.index[df["file"].astype(str).isin(test_files)].to_numpy()

    return train_idx, test_idx, train_files, test_files

train_idx, test_idx, train_files, test_files = stratified_file_split_by_gate(
    df, test_size=TEST_SIZE_FILES, seed=SEED, low_max_thr=LOW_MAX_THR
)

print("Train rows:", len(train_idx), "| Test rows:", len(test_idx))
print("Train files:", len(train_files), "| Test files:", len(test_files))
print("Overlap files:", len(train_files.intersection(test_files)))
print("Gate test counts:", dict(zip(*np.unique(y_gate[test_idx], return_counts=True))))
print("Thr  test counts:", dict(zip(*np.unique(y_thr[test_idx], return_counts=True))))


Train rows: 103 | Test rows: 34
Train files: 27 | Test files: 9
Overlap files: 0
Gate test counts: {np.int64(0): np.int64(24), np.int64(1): np.int64(10)}
Thr  test counts: {np.int64(1): np.int64(14), np.int64(2): np.int64(10), np.int64(8): np.int64(4), np.int64(16): np.int64(4), np.int64(64): np.int64(2)}


In [5]:
split = {
    "X_train": X_df.iloc[train_idx].reset_index(drop=True),
    "X_test":  X_df.iloc[test_idx].reset_index(drop=True),
    "y_train": y_thr[train_idx].astype(int),
    "y_test":  y_thr[test_idx].astype(int),
    "gate_train": y_gate[train_idx].astype(int),
    "gate_test":  y_gate[test_idx].astype(int),
    "groups_train": df.loc[train_idx, "file"].astype(str).values,
    "groups_test":  df.loc[test_idx, "file"].astype(str).values,
}

print("Num features (train):", split["X_train"].shape[1])
print("Unique train files:", len(np.unique(split["groups_train"])))
print("Unique test  files:", len(np.unique(split["groups_test"])))


Num features (train): 64
Unique train files: 27
Unique test  files: 9


In [6]:
X_train = split["X_train"]
X_test  = split["X_test"]

y_thr_train  = split["y_train"]
y_thr_test   = split["y_test"]
y_gate_train = split["gate_train"]
y_gate_test  = split["gate_test"]

groups_train = split["groups_train"]
groups_test  = split["groups_test"]

# gate subset
Xg_tr, yg_tr, gg_tr = X_train, y_gate_train, groups_train
Xg_te, yg_te, gg_te = X_test,  y_gate_test,  groups_test

# low subset
low_tr_mask = (y_thr_train <= LOW_MAX_THR)
low_te_mask = (y_thr_test  <= LOW_MAX_THR)

Xl_tr = X_train.loc[low_tr_mask].reset_index(drop=True)
yl_tr = y_thr_train[low_tr_mask]
gl_tr = groups_train[low_tr_mask]

Xl_te = X_test.loc[low_te_mask].reset_index(drop=True)
yl_te = y_thr_test[low_te_mask]
gl_te = groups_test[low_te_mask]

# high subset
high_tr_mask = (y_thr_train > LOW_MAX_THR)
high_te_mask = (y_thr_test  > LOW_MAX_THR)

Xh_tr = X_train.loc[high_tr_mask].reset_index(drop=True)
yh_tr = y_thr_train[high_tr_mask]
gh_tr = groups_train[high_tr_mask]

Xh_te = X_test.loc[high_te_mask].reset_index(drop=True)
yh_te = y_thr_test[high_te_mask]
gh_te = groups_test[high_te_mask]

subsets = {
    "gate": (Xg_tr, yg_tr, gg_tr, Xg_te, yg_te, gg_te),
    "low":  (Xl_tr, yl_tr, gl_tr, Xl_te, yl_te, gl_te),
    "high": (Xh_tr, yh_tr, gh_tr, Xh_te, yh_te, gh_te),
}

print("Gate train:", Xg_tr.shape, "| Gate test:", Xg_te.shape)
print("Low  train:", Xl_tr.shape, "| low classes:", sorted(np.unique(yl_tr)))
print("High train:", Xh_tr.shape, "| high classes:", sorted(np.unique(yh_tr)))


Gate train: (103, 64) | Gate test: (34, 64)
Low  train: (76, 64) | low classes: [np.int64(1), np.int64(2)]
High train: (27, 64) | high classes: [np.int64(4), np.int64(8), np.int64(16)]


In [7]:
def reward_scalar(y_true, y_pred):
    y_true = int(y_true); y_pred = int(y_pred)
    return 0.0 if y_pred < y_true else float(y_true) / float(y_pred)

def mean_reward(y_true, y_pred):
    y_true = np.asarray(y_true, dtype=int)
    y_pred = np.asarray(y_pred, dtype=int)
    return float(np.mean([reward_scalar(t, p) for t, p in zip(y_true, y_pred)]))

def reward_matrix(classes):
    classes = np.asarray(classes, dtype=int)
    R = np.zeros((len(classes), len(classes)), dtype=float)
    for i, t in enumerate(classes):
        for j, p in enumerate(classes):
            R[i, j] = reward_scalar(t, p)
    return R

def predict_max_expected_reward(proba_aligned, classes):
    classes = np.asarray(classes, dtype=int)
    R = reward_matrix(classes)
    exp_reward = proba_aligned @ R
    best_j = np.argmax(exp_reward, axis=1)
    return classes[best_j]

def align_proba_to_classes(estimator, proba, classes_all):
    classes_all = np.asarray(classes_all, dtype=int)
    est_classes = np.asarray(estimator.classes_, dtype=int)

    out = np.zeros((proba.shape[0], len(classes_all)), dtype=float)
    col_map = {c: i for i, c in enumerate(est_classes)}

    for j, c in enumerate(classes_all):
        if c in col_map:
            out[:, j] = proba[:, col_map[c]]

    row_sums = out.sum(axis=1, keepdims=True)
    mask = row_sums.squeeze() > 0
    out[mask] = out[mask] / row_sums[mask]
    return out


In [8]:
def build_preprocessor(X):
    cat_cols = [c for c in X.columns if X[c].dtype == "object" or str(X[c].dtype).startswith("category")]
    num_cols = [c for c in X.columns if c not in cat_cols]

    num_pipe = Pipeline([
        ("imputer", SimpleImputer(strategy="median")),
        ("scaler", StandardScaler()),
    ])

    cat_pipe = Pipeline([
        ("imputer", SimpleImputer(strategy="most_frequent")),
        ("onehot", OneHotEncoder(handle_unknown="ignore")),
    ])

    return ColumnTransformer([
        ("num", num_pipe, num_cols),
        ("cat", cat_pipe, cat_cols),
    ])


In [9]:
def build_selector(score_func="mi", k_best=None):
    if k_best is None:
        return None
    if score_func == "mi":
        return SelectKBest(mutual_info_classif, k=k_best)
    if score_func == "f":
        return SelectKBest(f_classif, k=k_best)
    raise ValueError("score_func must be 'mi' or 'f'")

def model_lr_binary(X_ref, C=1.0, k_best=10, score_func="mi"):
    pre = build_preprocessor(X_ref)
    sel = build_selector(score_func=score_func, k_best=k_best)

    lr = LogisticRegression(
        C=C, solver="liblinear", max_iter=8000, random_state=SEED
    )

    steps = [("pre", pre), ("vt", VarianceThreshold())]
    if sel is not None:
        steps.append(("sel", sel))
    steps.append(("clf", lr))
    return Pipeline(steps)

def model_et(X_ref, n_estimators=900, max_depth=None, min_samples_leaf=1, k_best=15, score_func="f"):
    pre = build_preprocessor(X_ref)
    sel = build_selector(score_func=score_func, k_best=k_best)

    et = ExtraTreesClassifier(
        n_estimators=n_estimators,
        max_depth=max_depth,
        min_samples_leaf=min_samples_leaf,
        random_state=SEED
    )

    steps = [("pre", pre), ("vt", VarianceThreshold())]
    if sel is not None:
        steps.append(("sel", sel))
    steps.append(("clf", et))
    return Pipeline(steps)

def model_lr_ovr_multiclass(X_ref, C=0.01, k_best=None, score_func="mi"):
    pre = build_preprocessor(X_ref)
    sel = build_selector(score_func=score_func, k_best=k_best)

    base = LogisticRegression(
        C=C, solver="liblinear", max_iter=12000, random_state=SEED
    )
    ovr = OneVsRestClassifier(base)

    steps = [("pre", pre), ("vt", VarianceThreshold())]
    if sel is not None:
        steps.append(("sel", sel))
    steps.append(("clf", ovr))
    return Pipeline(steps)


In [10]:
def make_folds(X, y, groups, n_splits):
    gkf = GroupKFold(n_splits=n_splits)
    return [(tr, te) for tr, te in gkf.split(X, y, groups=groups)]

def eval_gate_cv(estimator, X, y_gate, groups, folds):
    baccs = []
    fn_rates = []
    fp_rates = []

    for tr_idx, te_idx in folds:
        est = clone(estimator)
        est.fit(X.iloc[tr_idx], y_gate[tr_idx])

        y_pred = est.predict(X.iloc[te_idx])
        y_true = y_gate[te_idx]

        true_high = (y_true == 1).sum()
        true_low  = (y_true == 0).sum()
        fn = ((y_true == 1) & (y_pred == 0)).sum()
        fp = ((y_true == 0) & (y_pred == 1)).sum()

        fn_rate = fn / true_high if true_high else 0.0
        fp_rate = fp / true_low  if true_low else 0.0

        baccs.append(balanced_accuracy_score(y_true, y_pred))
        fn_rates.append(fn_rate)
        fp_rates.append(fp_rate)

    return float(np.mean(baccs)), float(np.std(baccs)), float(np.max(fn_rates)), float(np.mean(fp_rates))

def eval_threshold_cv_reward(estimator, X, y_thr, groups, folds, classes_ladder):
    scores = []
    classes_ladder = np.asarray(classes_ladder, dtype=int)

    for tr_idx, te_idx in folds:
        est = clone(estimator)
        est.fit(X.iloc[tr_idx], y_thr[tr_idx])

        proba = est.predict_proba(X.iloc[te_idx])
        proba_aligned = align_proba_to_classes(est, proba, classes_ladder)

        y_pred = predict_max_expected_reward(proba_aligned, classes_ladder)
        scores.append(mean_reward(y_thr[te_idx], y_pred))

    return float(np.mean(scores)), float(np.std(scores))


In [11]:
# --- CV folds ---
gate_folds = make_folds(Xg_tr, yg_tr, gg_tr, N_SPLITS_GATE)
low_folds  = make_folds(Xl_tr, yl_tr, gl_tr, N_SPLITS_LOW)
high_folds = make_folds(Xh_tr, yh_tr, gh_tr, N_SPLITS_HIGH)

# --- Manual tiny grids ---
gate_grid = []
for C in [0.1, 1.0, 3.0]:
    for k in [10, 20]:
        name = f"gate_lr_C{C}_k{k}_mi"
        est = model_lr_binary(Xg_tr, C=C, k_best=k, score_func="mi")
        mean_bacc, std_bacc, fn_max, fp_mean = eval_gate_cv(est, Xg_tr, yg_tr, gg_tr, gate_folds)
        gate_grid.append((name, mean_bacc, std_bacc, fn_max, fp_mean))

gate_grid = sorted(gate_grid, key=lambda x: (x[3], -x[1], x[4]))
print("Top gate (name, mean_bacc, std, fn_max, fp_mean):")
for r in gate_grid[:10]:
    print(r)
best_gate_name = gate_grid[0][0]

low_classes = np.array([1,2], dtype=int)
low_grid = []
for n in [400, 900]:
    for d in [None, 8]:
        for k in [10, 15, 30]:
            name = f"low_et_n{n}_d{d}_k{k}_f"
            est = model_et(Xl_tr, n_estimators=n, max_depth=d, k_best=k, score_func="f")
            mean_r, std_r = eval_threshold_cv_reward(est, Xl_tr, yl_tr, gl_tr, low_folds, low_classes)
            low_grid.append((name, mean_r, std_r))

low_grid = sorted(low_grid, key=lambda x: (-x[1], x[2]))
print("\nTop low (name, mean_reward, std):")
for r in low_grid[:10]:
    print(r)
best_low_name = low_grid[0][0]

high_classes = np.array([4,8,16,64], dtype=int)
high_grid = []
for C in [0.001, 0.01, 0.1, 1.0]:
    for k in [None, 10, 20]:
        for sf in ["mi", "f"]:
            name = f"high_lr_ovr_C{C}_k{k}_{sf}"
            est = model_lr_ovr_multiclass(Xh_tr, C=C, k_best=k, score_func=sf)
            mean_r, std_r = eval_threshold_cv_reward(est, Xh_tr, yh_tr, gh_tr, high_folds, high_classes)
            high_grid.append((name, mean_r, std_r))

high_grid = sorted(high_grid, key=lambda x: (-x[1], x[2]))
print("\nTop high (name, mean_reward, std):")
for r in high_grid[:10]:
    print(r)
best_high_name = high_grid[0][0]

print("\nSelected winners:")
print("Gate:", best_gate_name)
print("Low :", best_low_name)
print("High:", best_high_name)




Top gate (name, mean_bacc, std, fn_max, fp_mean):
('gate_lr_C3.0_k20_mi', 0.7150000000000001, 0.19209372712298547, 1.0, 0.09)
('gate_lr_C1.0_k20_mi', 0.6900000000000001, 0.19078784028338913, 1.0, 0.13999999999999999)
('gate_lr_C0.1_k20_mi', 0.6483333333333333, 0.28869437896232836, 1.0, 0.22333333333333333)
('gate_lr_C3.0_k10_mi', 0.5349999999999999, 0.14106735979665885, 1.0, 0.09)
('gate_lr_C1.0_k10_mi', 0.4766666666666667, 0.18844392033470092, 1.0, 0.20666666666666664)
('gate_lr_C0.1_k10_mi', 0.44333333333333325, 0.2225109235770485, 1.0, 0.2733333333333333)

Top low (name, mean_reward, std):
('low_et_n400_dNone_k30_f', 0.8916666666666666, 0.0565194165260439)
('low_et_n400_d8_k30_f', 0.8916666666666666, 0.0565194165260439)
('low_et_n900_dNone_k30_f', 0.8916666666666666, 0.0565194165260439)
('low_et_n900_d8_k30_f', 0.8916666666666666, 0.0565194165260439)
('low_et_n900_dNone_k15_f', 0.8916666666666666, 0.097182531580755)
('low_et_n900_d8_k15_f', 0.8916666666666666, 0.097182531580755)
('l

In [13]:
import re

def parse_gate(name):
    # example: gate_lr_C1.0_k10_mi
    m = re.match(r"^gate_lr_C(?P<C>[-+0-9.eE]+)_k(?P<k>\d+)_(?P<sf>mi|f)$", name)
    if not m:
        raise ValueError(f"Bad gate name format: {name}")
    C = float(m.group("C"))
    k = int(m.group("k"))
    return C, k

def parse_low(name):
    # example: low_et_n900_dNone_k15_f  OR low_et_n400_d8_k10_f
    m = re.match(r"^low_et_n(?P<n>\d+)_d(?P<d>None|\d+)_k(?P<k>\d+)_(?P<sf>mi|f)$", name)
    if not m:
        raise ValueError(f"Bad low name format: {name}")
    n = int(m.group("n"))
    d_raw = m.group("d")
    d = None if d_raw == "None" else int(d_raw)
    k = int(m.group("k"))
    return n, d, k

def parse_high(name):
    # example: high_lr_ovr_C0.01_kNone_mi  OR high_lr_ovr_C1.0_k20_f
    m = re.match(r"^high_lr_ovr_C(?P<C>[-+0-9.eE]+)_k(?P<k>None|\d+)_(?P<sf>mi|f)$", name)
    if not m:
        raise ValueError(f"Bad high name format: {name}")
    C = float(m.group("C"))
    k_raw = m.group("k")
    k = None if k_raw == "None" else int(k_raw)
    sf = m.group("sf")
    return C, k, sf

# ---- parse winners ----
Cg, Kg = parse_gate(best_gate_name)
Nl, Dl, Kl = parse_low(best_low_name)
Ch, Kh, Sh = parse_high(best_high_name)

# ---- build winners ----
gate_best = model_lr_binary(Xg_tr, C=Cg, k_best=Kg, score_func="mi")
low_best  = model_et(Xl_tr, n_estimators=Nl, max_depth=Dl, k_best=Kl, score_func="f")
high_best = model_lr_ovr_multiclass(Xh_tr, C=Ch, k_best=Kh, score_func=Sh)

# ---- fit ----
gate_best.fit(Xg_tr, yg_tr)
low_best.fit(Xl_tr, yl_tr)
high_best.fit(Xh_tr, yh_tr)

print("Parsed winners:")
print("Gate:", best_gate_name, "| C=", Cg, "k=", Kg)
print("Low :", best_low_name,  "| n=", Nl, "d=", Dl, "k=", Kl)
print("High:", best_high_name, "| C=", Ch, "k=", Kh, "sf=", Sh)
print("Models fitted.")


Parsed winners:
Gate: gate_lr_C3.0_k20_mi | C= 3.0 k= 20
Low : low_et_n400_dNone_k30_f | n= 400 d= None k= 30
High: high_lr_ovr_C0.1_k10_mi | C= 0.1 k= 10 sf= mi
Models fitted.


In [14]:
def pipeline_predict_thresholds(gate_model, low_model, high_model, X_test, y_gate_true, y_thr_true):
    gate_pred = gate_model.predict(X_test).astype(int)

    y_pred = np.zeros(len(X_test), dtype=int)
    route = np.array(["LOW"] * len(X_test), dtype=object)
    route[gate_pred == 1] = "HIGH"

    low_idx = np.where(gate_pred == 0)[0]
    if len(low_idx) > 0:
        proba = low_model.predict_proba(X_test.iloc[low_idx])
        proba = align_proba_to_classes(low_model, proba, np.array([1,2], dtype=int))
        y_pred[low_idx] = predict_max_expected_reward(proba, np.array([1,2], dtype=int))

    high_idx = np.where(gate_pred == 1)[0]
    if len(high_idx) > 0:
        proba = high_model.predict_proba(X_test.iloc[high_idx])
        proba = align_proba_to_classes(high_model, proba, np.array([4,8,16,64], dtype=int))
        y_pred[high_idx] = predict_max_expected_reward(proba, np.array([4,8,16,64], dtype=int))

    dbg = pd.DataFrame({
        "gate_true": y_gate_true.astype(int),
        "gate_pred": gate_pred.astype(int),
        "route": route,
        "thr_true": y_thr_true.astype(int),
        "thr_pred": y_pred.astype(int),
    })
    return y_pred, dbg


In [15]:
y_pred_test, dbg = pipeline_predict_thresholds(
    gate_best, low_best, high_best,
    X_test, y_gate_test, y_thr_test
)

dbg["reward"] = [reward_scalar(t, p) for t, p in zip(dbg["thr_true"], dbg["thr_pred"])]

cm = confusion_matrix(dbg["gate_true"], dbg["gate_pred"], labels=[0,1])

print("\n=== HOLDOUT RESULTS (seed =", SEED, ") ===")
print("Test rows:", len(dbg), "| Unique test files:", len(np.unique(groups_test)))
print("Mean reward:", float(np.mean(dbg["reward"].values)))
print("Zero-rate:", float(np.mean(dbg["reward"].values == 0.0)))
print("Mean TRUE low:", float(dbg.loc[dbg["gate_true"]==0, "reward"].mean()) if (dbg["gate_true"]==0).any() else None)
print("Mean TRUE high:", float(dbg.loc[dbg["gate_true"]==1, "reward"].mean()) if (dbg["gate_true"]==1).any() else None)
print("\nGate confusion matrix (rows true [low,high] x cols pred [low,high]):\n", cm)

print("\nWorst 10 rows:")
print(dbg.sort_values("reward").head(10).to_string(index=False))



=== HOLDOUT RESULTS (seed = 42 ) ===
Test rows: 34 | Unique test files: 9
Mean reward: 0.7058823529411765
Zero-rate: 0.23529411764705882
Mean TRUE low: 0.9166666666666666
Mean TRUE high: 0.2

Gate confusion matrix (rows true [low,high] x cols pred [low,high]):
 [[24  0]
 [ 4  6]]

Worst 10 rows:
 gate_true  gate_pred route  thr_true  thr_pred  reward
         0          0   LOW         2         1     0.0
         0          0   LOW         2         1     0.0
         1          1  HIGH        64        16     0.0
         1          1  HIGH        64        16     0.0
         1          0   LOW        16         1     0.0
         1          0   LOW        16         1     0.0
         1          0   LOW        16         1     0.0
         1          0   LOW        16         1     0.0
         1          1  HIGH         8        16     0.5
         1          1  HIGH         8        16     0.5


In [16]:
def run_once(seed):
    train_idx, test_idx, _, _ = stratified_file_split_by_gate(
        df, test_size=TEST_SIZE_FILES, seed=seed, low_max_thr=LOW_MAX_THR
    )

    X_train = X_df.iloc[train_idx].reset_index(drop=True)
    X_test  = X_df.iloc[test_idx].reset_index(drop=True)

    y_thr_train = y_thr[train_idx].astype(int)
    y_thr_test  = y_thr[test_idx].astype(int)
    y_gate_train = (y_thr_train > LOW_MAX_THR).astype(int)
    y_gate_test  = (y_thr_test  > LOW_MAX_THR).astype(int)

    groups_train = df.loc[train_idx, "file"].astype(str).values
    groups_test  = df.loc[test_idx, "file"].astype(str).values

    # subsets
    low_tr_mask = (y_thr_train <= LOW_MAX_THR)
    high_tr_mask = (y_thr_train > LOW_MAX_THR)

    Xg_tr, yg_tr, gg_tr = X_train, y_gate_train, groups_train
    Xl_tr, yl_tr, gl_tr = X_train.loc[low_tr_mask].reset_index(drop=True), y_thr_train[low_tr_mask], groups_train[low_tr_mask]
    Xh_tr, yh_tr, gh_tr = X_train.loc[high_tr_mask].reset_index(drop=True), y_thr_train[high_tr_mask], groups_train[high_tr_mask]

    # rebuild winners using parsed params (same as CELL 11)
    gate_m = model_lr_binary(Xg_tr, C=Cg, k_best=Kg, score_func="mi")
    low_m  = model_et(Xl_tr, n_estimators=Nl, max_depth=Dl, k_best=Kl, score_func="f")
    high_m = model_lr_ovr_multiclass(Xh_tr, C=Ch, k_best=Kh, score_func=Sh)

    gate_m.fit(Xg_tr, yg_tr)
    low_m.fit(Xl_tr, yl_tr)
    high_m.fit(Xh_tr, yh_tr)

    y_pred, dbg = pipeline_predict_thresholds(gate_m, low_m, high_m, X_test, y_gate_test, y_thr_test)
    dbg["reward"] = [reward_scalar(t, p) for t, p in zip(dbg["thr_true"], dbg["thr_pred"])]

    mean_reward = float(np.mean(dbg["reward"].values))
    zero_rate = float(np.mean(dbg["reward"].values == 0.0))

    cm = confusion_matrix(dbg["gate_true"], dbg["gate_pred"], labels=[0,1])
    true_high = (dbg["gate_true"] == 1).sum()
    true_low  = (dbg["gate_true"] == 0).sum()
    fn_rate = float(((dbg["gate_true"]==1) & (dbg["gate_pred"]==0)).sum() / true_high) if true_high else 0.0
    fp_rate = float(((dbg["gate_true"]==0) & (dbg["gate_pred"]==1)).sum() / true_low)  if true_low else 0.0

    return {"seed": seed, "mean_reward": mean_reward, "zero_rate": zero_rate, "gate_fn": fn_rate, "gate_fp": fp_rate, "test_files": len(np.unique(groups_test)), "gate_cm": cm}

seeds = [42, 7, 99]
res = [run_once(s) for s in seeds]

print("\n=== MULTI-SEED SUMMARY ===")
for r in res:
    print("seed=", r["seed"], "| mean_reward=", r["mean_reward"], "| zero_rate=", r["zero_rate"],
          "| gate_fn=", r["gate_fn"], "| gate_fp=", r["gate_fp"], "| test_files=", r["test_files"])
    print("gate_cm:\n", r["gate_cm"], "\n")

print("Mean over seeds:", float(np.mean([r["mean_reward"] for r in res])))
print("Std  over seeds:", float(np.std([r["mean_reward"] for r in res])))
print("Max  zero_rate :", float(np.max([r["zero_rate"] for r in res])))



=== MULTI-SEED SUMMARY ===
seed= 42 | mean_reward= 0.5955882352941176 | zero_rate= 0.23529411764705882 | gate_fn= 0.0 | gate_fp= 0.16666666666666666 | test_files= 9
gate_cm:
 [[20  4]
 [ 0 10]] 

seed= 7 | mean_reward= 0.7291666666666666 | zero_rate= 0.1111111111111111 | gate_fn= 0.0 | gate_fp= 0.16666666666666666 | test_files= 9
gate_cm:
 [[20  4]
 [ 0 12]] 

seed= 99 | mean_reward= 0.6857142857142857 | zero_rate= 0.2 | gate_fn= 0.6363636363636364 | gate_fp= 0.0 | test_files= 9
gate_cm:
 [[24  0]
 [ 7  4]] 

Mean over seeds: 0.67015639589169
Std  over seeds: 0.05563173724764618
Max  zero_rate : 0.23529411764705882
