# üß™ Banc de Test Mod√®les (Model Validation)

Ce notebook teste **directement les objets mod√®les** (`HybridPredictor`, `BaselineArtifact`, `CatBoost`) en contournant la couche application (`inference.py`, Streamlit).

**Objectifs** :
1. Charger les artefacts depuis le disque.
2. Inspecter les features attendues par le mod√®le CatBoost.
3. Valider la pr√©diction sur des donn√©es brutes santitiz√©es.

---

In [None]:
# 1. IMPORTS & SETUP
import sys
import json
import pandas as pd
import numpy as np
from pathlib import Path

# Ajout du dossier racine
project_root = Path("..").resolve()
if str(project_root) not in sys.path:
    sys.path.append(str(project_root))

# Import DIRECT des classes mod√®les (Pas de Streamlit ici)
from src.models.catboost.trainer import CatboostTrainer
from src.models.catboost.predictor import HybridPredictor
from src.pipelines.baseline_pipeline import BaselineArtifact, recommend_from_selection

# Chemins
ARTIFACTS_DIR = project_root / "artifacts"
DATA_PATH = project_root / "data" / "Train.csv"

print(" Environnement Mod√®le charg√©.")

##  2. Chargement des Mod√®les

In [None]:
def load_models_manually():
    print("--- Chargement ---")
    
    # 1. Baseline
    baseline_path = ARTIFACTS_DIR / "baseline_v0"
    baseline = BaselineArtifact.load(baseline_path)
    print(f" Baseline charg√©e (Produits : {len(baseline.product_cols)})")
    
    # 2. CatBoost
    cbm_path = ARTIFACTS_DIR / "catboost_champion_v1.cbm"
    config_path = ARTIFACTS_DIR / "best_config_v1.json"
    
    with open(config_path, "r") as f: config = json.load(f)
    best_alpha = config.get("best_alpha", 0.5)
    
    # On doit instancier le Trainer pour charger le CBM
    # Note: On met des cat_features g√©n√©riques, le chargement du CBM restaurera les vrais noms si possible
    trainer = CatboostTrainer(cat_features=[], iterations=10)
    trainer.load(cbm_path)
    
    # 3. Features attendues
    expected_features = trainer.model.feature_names_
    print(f" CatBoost charg√©. Features attendues ({len(expected_features)}) :\n   {expected_features}")
    
    # 4. Hybrid
    hybrid = HybridPredictor(trainer, baseline)
    
    return hybrid, baseline, best_alpha, expected_features

# EXECUTION
hybrid_model, baseline_model, deployed_alpha, MODEL_FEATURES = load_models_manually()

##  3. Pr√©paration des Donn√©es (Fix Types)

C'est ici qu'on r√©sout les probl√®mes de types (Dates, etc.) pour correspondre EXACTEMENT √† ce que veut CatBoost.

In [None]:
# Chargement & Nettoyage initial des donn√©es brutes
df_raw = pd.read_csv(DATA_PATH)
# On force les dates en datetime pour pouvoir extraire l'ann√©e proprement
df_raw['join_date'] = pd.to_datetime(df_raw['join_date'], dayfirst=True, errors='coerce')
df_raw['join_year'] = df_raw['join_date'].dt.year.fillna(2017).astype(int)

df_raw['age'] = 2020 - df_raw['birth_year']

def prepare_single_input_from_dict(input_dict, product_cols, model_features):
    """
    Variante pour cr√©er un input depuis un dictionnaire python (Sc√©nario).
    """
    # On s'assure d'avoir l'age (soit direct, soit calcul√©)
    if 'age' not in input_dict and 'birth_year' in input_dict:
        input_dict['age'] = 2020 - input_dict['birth_year']
        
    model_input = {}
    owned = input_dict.get("owned_products", [])
    
    for feature in model_features:
        if feature in product_cols:
            model_input[feature] = 1 if feature in owned else 0
        else:
            val = input_dict.get(feature, "missing")
            if feature == 'join_year':
                val = int(val) if val != "missing" else 2017
            model_input[feature] = val
            
    df_row = pd.DataFrame([model_input])[model_features]
    return df_row, owned

print(" Fonctions de pr√©paration (From Dataframe & From Dict) valid√©es.")

##  4. SC√âNARIOS DE TEST (Stress Test)

In [None]:
def run_scenario(name, user_dict, alpha_variants=[0.0, 0.5, 0.9]):
    print(f"\nüéÆ --- SC√âNARIO : {name} --- ")
    
    # 1. Pr√©paration Input
    X, owned = prepare_single_input_from_dict(user_dict, baseline_model.product_cols, MODEL_FEATURES)
    print(f"    Profil : {user_dict}")
    print(f"    Panier  : {owned}")
    
    # 2. Baseline Pure (pour r√©f√©rence)
    recs_base = recommend_from_selection(baseline_model, owned, topk=3)
    print("    Baseline (Reference) : ", end="")
    if not recs_base.empty:
        base_str = ", ".join([f"{p}({s:.0%})" for p, s in recs_base.items()])
        print(base_str)
    else:
        print("Rien")

    # 3. Comparaison Hybride (Alpha)
    # NOTE IMPORTANTE : La logique est Multiplicative (Consensus).
    # Alpha = 1 signifie "Score = CatBoost * Baseline".
    # Donc si CatBoost dit NON (0%), le r√©sultat sera NON, m√™me si Baseline dit OUI.
    # Ce n'est pas une moyenne (Additive), c'est un filtre de s√©curit√©.
    print("    Hybrid Comparison :")
    try:
        for alpha in alpha_variants:
            probas = hybrid_model.predict_proba(X, alpha=alpha)[0]
            scores_cat = pd.Series(probas, index=baseline_model.product_cols)
            scores_cat = scores_cat.drop(owned, errors='ignore')
            top_cat = scores_cat.nlargest(3)
            
            # Formatage compact : "Prod(Score)"
            res_str = ", ".join([f"{p}({s:.1%})" for p, s in top_cat.items()])
            print(f"        Alpha {alpha:.1f} : {res_str}")
            
    except Exception as e:
        print(f"        ERREUR : {e}")


# === STRESS TEST CASES ===

# A. CLASSIQUES
run_scenario("Jeune D√©butant (21 ans, Rien)", {
    "sex": "M", "marital_status": "U", "birth_year": 1999, "join_year": 2019, 
    "owned_products": []
})

run_scenario("Famille Install√©e (40 ans, Auto+Sant√©)", {
    "sex": "F", "marital_status": "M", "birth_year": 1980, "join_year": 2010,
    "owned_products": ["P5DA", "RIBP"]
})

# B. EXTR√äMES (STRESS TESTS)
run_scenario(" Senior Satur√© (70 ans, 8 produits)", {
    "sex": "M", "marital_status": "M", "birth_year": 1950, "join_year": 1990,
    "owned_products": ["P5DA", "RIBP", "8NN1", "7POT", "66FJ", "GYSR", "SOP4", "RVSZ"]
})

run_scenario("Fant√¥me (Inscrit 2005, Rien achet√©)", {
    "sex": "M", "marital_status": "U", "birth_year": 1980, "join_year": 2005,
    "owned_products": []
})

run_scenario(" Data Error (Ann√©e Naissance Bizarre)", {
    "sex": "F", "marital_status": "M", "birth_year": 2025, # Futur ! -> Age n√©gatif
    "join_year": 2020,
    "owned_products": ["P5DA"]
})