# 05 - Ensemble + Robustness V2

SÃ©lection de runs robustes, optimisation des poids, et stress tests public/private.


In [1]:
import sys
from pathlib import Path
import json
import numpy as np
import pandas as pd

ROOT = Path.cwd()
if not (ROOT / "src").exists():
    ROOT = ROOT.parent
if str(ROOT) not in sys.path:
    sys.path.insert(0, str(ROOT))

from src.v2_pipeline import (
    DEFAULT_V2_DIR,
    ensure_dir,
    load_json,
    optimize_non_negative_weights,
    rmse,
    select_final_models,
    simulate_public_private_shakeup_v2,
    build_model_cards,
)
ARTIFACT_V2 = ensure_dir(ROOT / DEFAULT_V2_DIR)


In [2]:
run_df = pd.read_csv(ARTIFACT_V2 / "run_registry_v2.csv")
oof = pd.read_parquet(ARTIFACT_V2 / "oof_predictions_v2.parquet")

selected = select_final_models(run_df, risk_policy="stability_private")
selected.to_csv(ARTIFACT_V2 / "selected_models_v2.csv", index=False)
selected.head(10)


Unnamed: 0,run_id,feature_set,engine,family,config_id,seed,severity_mode,calibration,tail_mapper,rmse_aux_blocked5,rmse_primary_time,rmse_secondary_group,q99_aux_blocked5,q99_primary_time,q99_secondary_group,gap_secondary,gap_aux,tail_penalty,accepted,selection_score
0,base_v2|catboost|two_part_tweedie|cb_v2_c1|42|...,base_v2,catboost,two_part_tweedie,cb_v2_c1,42,weighted_tail,isotonic,isotonic,542.595859,543.897946,542.188168,0.298799,0.328733,0.312634,-1.709778,-1.302087,0.671267,True,545.911748
1,base_v2|catboost|two_part_tweedie|cb_v2_c1|42|...,base_v2,catboost,two_part_tweedie,cb_v2_c1,42,classic,isotonic,isotonic,542.571435,544.026383,542.170569,0.283293,0.321118,0.314721,-1.855814,-1.454948,0.678882,True,546.06303
2,base_v2|catboost|two_part_classic|cb_v2_c1|42|...,base_v2,catboost,two_part_classic,cb_v2_c1,42,weighted_tail,isotonic,isotonic,542.577648,544.010358,542.201929,0.297873,0.294376,0.319492,-1.808429,-1.432711,0.705624,True,546.127231
3,base_v2|catboost|two_part_classic|cb_v2_c1|42|...,base_v2,catboost,two_part_classic,cb_v2_c1,42,classic,isotonic,isotonic,542.670668,544.092828,542.083295,0.294504,0.31024,0.320485,-2.009533,-1.422161,0.68976,True,546.162109
4,base_v2|xgboost|two_part_tweedie|xgb_v2_c1|42|...,base_v2,xgboost,two_part_tweedie,xgb_v2_c1,42,classic,none,isotonic,542.888025,544.322745,542.289359,0.288082,0.330594,0.293308,-2.033386,-1.43472,0.669406,True,546.330964
5,base_v2|lightgbm|two_part_classic|lgb_v2_c1|42...,base_v2,lightgbm,two_part_classic,lgb_v2_c1,42,classic,isotonic,isotonic,543.121771,544.351093,542.72567,0.285694,0.336075,0.311227,-1.625423,-1.229321,0.663925,True,546.342868


In [3]:
def build_matrix(pred_df, split_name, run_ids, is_test=0):
    d = pred_df[(pred_df["split"] == split_name) & (pred_df["is_test"] == is_test)].copy()
    d = d[d["run_id"].isin(run_ids)]
    wide = d.pivot_table(index="row_idx", columns="run_id", values="pred_prime", aggfunc="first")
    y = d.groupby("row_idx")["y_sev"].first() if is_test == 0 else pd.Series(index=wide.index, dtype=float)
    return wide, y

run_ids = selected["run_id"].tolist()
Xp, yp = build_matrix(oof, "primary_time", run_ids, is_test=0)
mask = Xp.notna().all(axis=1)
Xp_fit = Xp.loc[mask]
yp_fit = yp.loc[mask]

w = optimize_non_negative_weights(Xp_fit.values, yp_fit.values)
weight_map = {rid: float(v) for rid, v in zip(Xp_fit.columns.tolist(), w)}
ens_primary = Xp_fit.values @ w
ens_primary_rmse = rmse(yp_fit.values, ens_primary)

print("models:", len(run_ids))
print("primary rmse:", ens_primary_rmse)
weight_map


models: 6
primary rmse: 545.8762177657387


{'base_v2|catboost|two_part_classic|cb_v2_c1|42|classic|isotonic|isotonic': 1.1371388189090222e-16,
 'base_v2|catboost|two_part_classic|cb_v2_c1|42|weighted_tail|isotonic|isotonic': 7.521521065267763e-18,
 'base_v2|catboost|two_part_tweedie|cb_v2_c1|42|classic|isotonic|isotonic': 0.0,
 'base_v2|catboost|two_part_tweedie|cb_v2_c1|42|weighted_tail|isotonic|isotonic': 0.7355516926674056,
 'base_v2|lightgbm|two_part_classic|lgb_v2_c1|42|classic|isotonic|isotonic': 0.2644483073325944,
 'base_v2|xgboost|two_part_tweedie|xgb_v2_c1|42|classic|none|isotonic': 0.0}

In [4]:
# fallback single + shakeup
single_scores = [(rid, rmse(yp_fit.values, Xp_fit[rid].values)) for rid in Xp_fit.columns]
best_single_run, best_single_rmse = sorted(single_scores, key=lambda x: x[1])[0]

sh_ens = simulate_public_private_shakeup_v2(
    yp_fit.values, ens_primary, n_sim=2000, public_ratio=1/3, seed=42
)
sh_ens_tail = simulate_public_private_shakeup_v2(
    yp_fit.values,
    ens_primary,
    n_sim=2000,
    public_ratio=1/3,
    seed=42,
    stratified_tail=True,
    tail_quantile=0.9,
    tail_public_share=0.5,
)
sh_single = simulate_public_private_shakeup_v2(
    yp_fit.values, Xp_fit[best_single_run].values, n_sim=2000, public_ratio=1/3, seed=42
)

ens_gap_std = float(sh_ens["gap_public_minus_private"].std())
ens_tail_gap_std = float(sh_ens_tail["gap_public_minus_private"].std())
single_gap_std = float(sh_single["gap_public_minus_private"].std())

use_ensemble = not (ens_gap_std > single_gap_std * 1.05 or ens_tail_gap_std > single_gap_std * 1.10)
strategy = "ensemble" if use_ensemble else "single"

sh_ens.to_parquet(ARTIFACT_V2 / "shakeup_v2_ensemble.parquet", index=False)
sh_ens_tail.to_parquet(ARTIFACT_V2 / "shakeup_v2_ensemble_tail.parquet", index=False)
sh_single.to_parquet(ARTIFACT_V2 / "shakeup_v2_single.parquet", index=False)

meta = {
    "strategy": strategy,
    "run_ids": run_ids,
    "weights": weight_map,
    "best_single_run": best_single_run,
    "ens_primary_rmse": float(ens_primary_rmse),
    "best_single_rmse": float(best_single_rmse),
    "ens_gap_std": ens_gap_std,
    "ens_tail_gap_std": ens_tail_gap_std,
    "single_gap_std": single_gap_std,
}
(ARTIFACT_V2 / "ensemble_weights_v2.json").write_text(json.dumps(meta, indent=2), encoding="utf-8")

cards = build_model_cards(run_df, selected)
cards.to_csv(ARTIFACT_V2 / "model_cards_v2.csv", index=False)
print("strategy:", strategy)


strategy: single
