# 05 - Ensemble and robustness V2.1

Objectifs:
- selection multi-splits avec penalites de dispersion,
- comparaison ensemble vs single,
- stress tests shake-up (standard + tail stratified),
- export selection report + poids d'ensemble.


In [1]:
import sys
import json
from pathlib import Path
import numpy as np
import pandas as pd

ROOT = Path.cwd()
if not (ROOT / "src").exists():
    ROOT = ROOT.parent
if str(ROOT) not in sys.path:
    sys.path.insert(0, str(ROOT))

from src.v2_pipeline import (
    DEFAULT_V2_DIR,
    ensure_dir,
    optimize_non_negative_weights,
    rmse,
    select_final_models,
    simulate_public_private_shakeup_v2,
    build_model_cards,
    build_prediction_distribution_table,
)

ARTIFACT_V2 = ensure_dir(ROOT / DEFAULT_V2_DIR)


In [2]:
run_df = pd.read_csv(ARTIFACT_V2 / "run_registry_v2.csv")
oof = pd.read_parquet(ARTIFACT_V2 / "oof_predictions_v2.parquet")

selection_report = select_final_models(run_df, risk_policy="stability_private", return_report=True)
selection_report.to_csv(ARTIFACT_V2 / "selection_report_v2.csv", index=False)

selected = select_final_models(run_df, risk_policy="stability_private", return_report=False)
selected.to_csv(ARTIFACT_V2 / "selected_models_v2.csv", index=False)

print("selection report rows:", len(selection_report))
print("selected rows:", len(selected))
display(selection_report.head(20))
display(selected.head(20))


selection report rows: 27
selected rows: 6


Unnamed: 0,run_id,feature_set,engine,family,config_id,seed,severity_mode,calibration,tail_mapper,rmse_aux_blocked5,...,tail_penalty,selection_score,accepted_secondary,accepted_aux,accepted_tail,accepted_collapse,accepted_dispersion,accepted,decision_reason,rank
0,base_v2|catboost|two_part_tweedie|cb_v2_c1|42|...,base_v2,catboost,two_part_tweedie,cb_v2_c1,42,weighted_tail,isotonic,isotonic,542.595859,...,0.671267,545.585711,True,True,True,True,True,True,accepted,1
1,base_v2|catboost|two_part_tweedie|cb_v2_c1|42|...,base_v2,catboost,two_part_tweedie,cb_v2_c1,42,classic,isotonic,isotonic,542.571435,...,0.678882,545.732181,True,True,True,True,True,True,accepted,2
2,base_v2|catboost|two_part_classic|cb_v2_c1|42|...,base_v2,catboost,two_part_classic,cb_v2_c1,42,classic,isotonic,isotonic,542.670668,...,0.68976,545.834739,True,True,True,True,True,True,accepted,3
3,base_v2|xgboost|two_part_tweedie|xgb_v2_c1|42|...,base_v2,xgboost,two_part_tweedie,xgb_v2_c1,42,classic,none,isotonic,542.888025,...,0.669406,546.014272,True,True,True,True,True,True,accepted,4
4,base_v2|lightgbm|two_part_classic|lgb_v2_c1|42...,base_v2,lightgbm,two_part_classic,lgb_v2_c1,42,classic,isotonic,isotonic,543.121771,...,0.663925,546.020444,True,True,True,True,True,True,accepted,5
5,base_v2|xgboost|two_part_tweedie|xgb_v2_c1|42|...,base_v2,xgboost,two_part_tweedie,xgb_v2_c1,42,classic,isotonic,isotonic,542.883211,...,0.669406,546.021755,True,True,True,True,True,True,accepted,6
6,base_v2|xgboost|two_part_classic|xgb_v2_c1|42|...,base_v2,xgboost,two_part_classic,xgb_v2_c1,42,classic,isotonic,isotonic,542.877905,...,0.680829,546.028749,True,True,True,True,True,True,accepted,7
7,base_v2|xgboost|two_part_classic|xgb_v2_c1|42|...,base_v2,xgboost,two_part_classic,xgb_v2_c1,42,weighted_tail,isotonic,isotonic,542.842109,...,0.685563,546.038646,True,True,True,True,True,True,accepted,8
8,base_v2|xgboost|two_part_classic|xgb_v2_c1|42|...,base_v2,xgboost,two_part_classic,xgb_v2_c1,42,weighted_tail,none,isotonic,542.854402,...,0.685563,546.040011,True,True,True,True,True,True,accepted,9
9,base_v2|xgboost|two_part_classic|xgb_v2_c1|42|...,base_v2,xgboost,two_part_classic,xgb_v2_c1,42,classic,none,isotonic,542.886507,...,0.680829,546.045743,True,True,True,True,True,True,accepted,10


Unnamed: 0,run_id,feature_set,engine,family,config_id,seed,severity_mode,calibration,tail_mapper,rmse_aux_blocked5,...,tail_penalty,selection_score,accepted_secondary,accepted_aux,accepted_tail,accepted_collapse,accepted_dispersion,accepted,decision_reason,rank
0,base_v2|catboost|two_part_tweedie|cb_v2_c1|42|...,base_v2,catboost,two_part_tweedie,cb_v2_c1,42,weighted_tail,isotonic,isotonic,542.595859,...,0.671267,545.585711,True,True,True,True,True,True,accepted,1
1,base_v2|catboost|two_part_tweedie|cb_v2_c1|42|...,base_v2,catboost,two_part_tweedie,cb_v2_c1,42,classic,isotonic,isotonic,542.571435,...,0.678882,545.732181,True,True,True,True,True,True,accepted,2
2,base_v2|catboost|two_part_classic|cb_v2_c1|42|...,base_v2,catboost,two_part_classic,cb_v2_c1,42,classic,isotonic,isotonic,542.670668,...,0.68976,545.834739,True,True,True,True,True,True,accepted,3
3,base_v2|xgboost|two_part_tweedie|xgb_v2_c1|42|...,base_v2,xgboost,two_part_tweedie,xgb_v2_c1,42,classic,none,isotonic,542.888025,...,0.669406,546.014272,True,True,True,True,True,True,accepted,4
4,base_v2|lightgbm|two_part_classic|lgb_v2_c1|42...,base_v2,lightgbm,two_part_classic,lgb_v2_c1,42,classic,isotonic,isotonic,543.121771,...,0.663925,546.020444,True,True,True,True,True,True,accepted,5
5,base_v2|xgboost|two_part_tweedie|xgb_v2_c1|42|...,base_v2,xgboost,two_part_tweedie,xgb_v2_c1,42,classic,isotonic,isotonic,542.883211,...,0.669406,546.021755,True,True,True,True,True,True,accepted,6


In [3]:
def build_split_matrix(pred_df, split_name, run_ids):
    d = pred_df[(pred_df["is_test"] == 0) & (pred_df["split"] == split_name)].copy()
    d = d[d["run_id"].isin(run_ids)]
    wide = d.pivot_table(index="row_idx", columns="run_id", values="pred_prime", aggfunc="first")
    y = d.groupby("row_idx")["y_sev"].first()
    mask = wide.notna().all(axis=1) & y.notna()
    return wide.loc[mask], y.loc[mask]

run_ids = selected["run_id"].drop_duplicates().tolist()
Xp, yp = build_split_matrix(oof, "primary_time", run_ids)
w = optimize_non_negative_weights(Xp.values, yp.values)
weight_map = {rid: float(v) for rid, v in zip(Xp.columns.tolist(), w)}

ens_primary_pred = Xp.values @ w
ens_primary_rmse = rmse(yp.values, ens_primary_pred)

single_scores = [(rid, rmse(yp.values, Xp[rid].values)) for rid in Xp.columns]
best_single_run, best_single_rmse = sorted(single_scores, key=lambda x: x[1])[0]

print("ensemble primary rmse:", ens_primary_rmse)
print("best single primary rmse:", best_single_rmse, best_single_run)


ensemble primary rmse: 545.8523003388027
best single primary rmse: 545.9294822328463 base_v2|catboost|two_part_tweedie|cb_v2_c1|42|weighted_tail|isotonic|isotonic


In [4]:
def eval_on_split(split_name, run_ids, weight_map, single_run):
    Xs, ys = build_split_matrix(oof, split_name, run_ids)
    if len(Xs) == 0:
        return {"split": split_name, "rmse_ensemble": np.nan, "rmse_single": np.nan}
    ww = np.array([weight_map.get(c, 0.0) for c in Xs.columns], dtype=float)
    ww = ww / ww.sum() if ww.sum() > 0 else np.full(len(ww), 1.0 / len(ww))
    ens = Xs.values @ ww
    rmse_ens = rmse(ys.values, ens)
    rmse_s = rmse(ys.values, Xs[single_run].values) if single_run in Xs.columns else np.nan
    return {"split": split_name, "rmse_ensemble": float(rmse_ens), "rmse_single": float(rmse_s)}

split_eval = pd.DataFrame(
    [
        eval_on_split("primary_time", run_ids, weight_map, best_single_run),
        eval_on_split("secondary_group", run_ids, weight_map, best_single_run),
        eval_on_split("aux_blocked5", run_ids, weight_map, best_single_run),
    ]
)
display(split_eval)


Unnamed: 0,split,rmse_ensemble,rmse_single
0,primary_time,545.8523,545.929482
1,secondary_group,542.109587,542.188168
2,aux_blocked5,542.581121,542.595859


In [5]:
sh_ens = simulate_public_private_shakeup_v2(
    yp.values, ens_primary_pred, n_sim=2000, public_ratio=1/3, seed=42
)
sh_ens_tail = simulate_public_private_shakeup_v2(
    yp.values,
    ens_primary_pred,
    n_sim=2000,
    public_ratio=1/3,
    seed=42,
    stratified_tail=True,
    tail_quantile=0.9,
    tail_public_share=0.5,
)
sh_single = simulate_public_private_shakeup_v2(
    yp.values, Xp[best_single_run].values, n_sim=2000, public_ratio=1/3, seed=42
)

ens_gap_std = float(sh_ens["gap_public_minus_private"].std())
ens_tail_gap_std = float(sh_ens_tail["gap_public_minus_private"].std())
single_gap_std = float(sh_single["gap_public_minus_private"].std())

sec_row = split_eval[split_eval["split"] == "secondary_group"].iloc[0]
aux_row = split_eval[split_eval["split"] == "aux_blocked5"].iloc[0]
gain_primary = float(best_single_rmse - ens_primary_rmse)
degrade_secondary = float(sec_row["rmse_ensemble"] - sec_row["rmse_single"]) if pd.notna(sec_row["rmse_single"]) else 0.0
degrade_aux = float(aux_row["rmse_ensemble"] - aux_row["rmse_single"]) if pd.notna(aux_row["rmse_single"]) else 0.0

use_ensemble = (
    (gain_primary > 0.0)
    and (degrade_secondary <= 1.0)
    and (degrade_aux <= 1.0)
    and (ens_gap_std <= single_gap_std * 1.05)
    and (ens_tail_gap_std <= single_gap_std * 1.10)
)
strategy = "ensemble" if use_ensemble else "single"

sh_ens.to_parquet(ARTIFACT_V2 / "shakeup_v2_ensemble.parquet", index=False)
sh_ens_tail.to_parquet(ARTIFACT_V2 / "shakeup_v2_ensemble_tail.parquet", index=False)
sh_single.to_parquet(ARTIFACT_V2 / "shakeup_v2_single.parquet", index=False)

meta = {
    "strategy": strategy,
    "run_ids": run_ids,
    "weights": weight_map,
    "best_single_run": best_single_run,
    "ens_primary_rmse": float(ens_primary_rmse),
    "best_single_rmse": float(best_single_rmse),
    "gain_primary": gain_primary,
    "degrade_secondary": degrade_secondary,
    "degrade_aux": degrade_aux,
    "ens_gap_std": ens_gap_std,
    "ens_tail_gap_std": ens_tail_gap_std,
    "single_gap_std": single_gap_std,
}
(ARTIFACT_V2 / "ensemble_weights_v2.json").write_text(json.dumps(meta, indent=2), encoding="utf-8")

cards = build_model_cards(run_df, selected)
cards.to_csv(ARTIFACT_V2 / "model_cards_v2.csv", index=False)

pred_dist = build_prediction_distribution_table(oof)
pred_dist.to_csv(ARTIFACT_V2 / "pred_distribution_audit_v2.csv", index=False)

print("strategy:", strategy)
print("saved:", ARTIFACT_V2 / "selection_report_v2.csv")
print("saved:", ARTIFACT_V2 / "ensemble_weights_v2.json")


strategy: single
saved: c:\Users\icemo\Downloads\Calcul-prime-d-assurance\artifacts\v2\selection_report_v2.csv
saved: c:\Users\icemo\Downloads\Calcul-prime-d-assurance\artifacts\v2\ensemble_weights_v2.json
