# 03 - Objective screening V2

Screening moteur x famille x mode severite, pilote par:
- RMSE prime,
- q99_ratio_pos,
- flags de collapse de distribution.


In [None]:
import sys
from pathlib import Path
import pandas as pd

ROOT = Path.cwd()
if not (ROOT / "src").exists():
    ROOT = ROOT.parent
if str(ROOT) not in sys.path:
    sys.path.insert(0, str(ROOT))

from src.v2_pipeline import (
    DEFAULT_V2_DIR,
    ensure_dir,
    load_train_test,
    prepare_feature_sets,
    build_split_registry,
    run_benchmark,
    V2_COARSE_CONFIGS,
    V2_SCREENING_FAMILIES,
)
DATA_DIR = ROOT / "data"
ARTIFACT_V2 = ensure_dir(ROOT / DEFAULT_V2_DIR)


In [None]:
train_raw, test_raw = load_train_test(DATA_DIR)
feature_sets = prepare_feature_sets(train_raw, test_raw, rare_min_count=30, drop_identifiers=True)
bundle = feature_sets["robust_v2"]
splits = build_split_registry(train_raw, n_blocks_time=5, n_splits_group=5, group_col="id_client")
primary_only = {"primary_time": splits["primary_time"]}

rows_r = []
for engine in ["catboost", "lightgbm", "xgboost"]:
    cfg = V2_COARSE_CONFIGS[engine][0]
    for fam in V2_SCREENING_FAMILIES:
        spec = {
            "feature_set": "robust_v2",
            "engine": engine,
            "family": fam["family"],
            "severity_mode": fam["severity_mode"],
            "tweedie_power": fam["tweedie_power"],
            "config_id": cfg["config_id"],
            "calibration_methods": ["none"],
            "use_tail_mapper": fam["family"] != "direct_tweedie",
            "use_target_encoding": True,
            "target_encode_cols": ["code_postal", "cp3", "modele_vehicule", "marque_modele"],
            "target_encoding_smoothing": 20.0,
            "freq_params": cfg["freq_params"],
            "sev_params": cfg["sev_params"],
            "direct_params": cfg["direct_params"],
            "split_names": ["primary_time"],
        }
        _, r, _ = run_benchmark(spec, bundle=bundle, splits=primary_only, seed=42)
        rows_r.append(r)
        print("[screen]", engine, fam["family"], fam["severity_mode"], fam["tweedie_power"])

run_df = pd.concat(rows_r, ignore_index=True)
run_df["screening_score"] = (
    run_df["rmse_prime"]
    + 2.5 * (1.0 - run_df["q99_ratio_pos"].fillna(0.0)).abs()
    + 2.0 * run_df["distribution_collapse_flag"].fillna(0.0)
)
run_df.to_csv(ARTIFACT_V2 / "objective_screening_v2.csv", index=False)
display(run_df.sort_values("screening_score").head(30))


In [None]:
matrix = (
    run_df[run_df["level"] == "run"]
    .pivot_table(
        index=["engine", "family", "severity_mode"],
        values=["rmse_prime", "q99_ratio_pos", "distribution_collapse_flag", "screening_score"],
        aggfunc="mean",
    )
    .sort_values("screening_score")
)
display(matrix.head(30))
