In [None]:
import xgboost as xgb
import pandas as pd
import pickle
import json
import numpy as np

In [None]:
# Test GPU par XGBoost
try:
    # On cr√©e une micro-matrice de test
    data = xgb.DMatrix([[1, 2], [3, 4]], label=[1, 0])

    params = {'tree_method': 'gpu_hist', 'device': 'cuda'}
    xgb.train(params, data, num_boost_round=1)
    print("‚úÖ Succ√®s ! La RTX 4060 est reconnue et configur√©e.")
except Exception as e:
    print(f"‚ùå √âchec du GPU : {e}")
    print("Le mod√®le tournera sur CPU par d√©faut.")

In [None]:
df = pd.read_parquet('dataset_full.parquet')

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 10)
pd.set_option('display.float_format', '{:.4f}'.format)

print(f"Structure du dataset : {df.shape[0]} lignes et {df.shape[1]} colonnes")
display(df.head())

#### Import mod√®le et v√©rifications

In [None]:
# 1. Chargement
final_model = xgb.Booster()
final_model.load_model("model_survie_V3_final.json")

# 2. Fouille r√©cursive du JSON
def find_key(obj, key):
    """Cherche une cl√© n'importe o√π dans un dictionnaire ou une liste."""
    if isinstance(obj, dict):
        for k, v in obj.items():
            if k == key: return v
            res = find_key(v, key)
            if res is not None: return res
    elif isinstance(obj, list):
        for item in obj:
            res = find_key(item, key)
            if res is not None: return res
    return None

config = json.loads(final_model.save_config())
model_scale = find_key(config, 'aft_loss_distribution_scale')

if model_scale:
    # XGBoost stocke souvent les valeurs en string dans le JSON
    sigma_v3 = float(model_scale)
    print(f"üéØ Scale trouv√© dans le mod√®le : {sigma_v3}")
else:
    # Si vraiment introuvable, on reprend ta valeur du Trial 26
    sigma_v3 = 0.8011934962858392
    print(f"‚ö†Ô∏è Scale non trouv√© dans la config. Utilisation de la valeur manuelle : {sigma_v3}")

# 3. V√©rification des colonnes
if final_model.feature_names:
    print(f"‚úÖ Colonnes configur√©es ({len(final_model.feature_names)})")

##### D√©finition de la fonction de survie

In [None]:
# Rappel de la fonction de calcul robuste
def calculate_survival_risk(mu, horizon_annees, s=sigma_v3):
    z = (np.log(horizon_annees) - mu) / s
    z = np.clip(z, -50, 50)
    return (1 / (1 + np.exp(-z))) * 100

---

In [None]:
# --- 1. FILTRAGE DU P√âRIM√àTRE (SOCI√âT√âS OUVERTES UNIQUEMENT) ---
# On ne garde que les lignes o√π fermeture == 0
df_vivantes = df[df['fermeture'] == 0].copy()

# --- 2. CONFIGURATION DU MAPPING (4 CAT√âGORIES) ---
def map_statut_expert_v3(p2):
    if p2 > 20: return 'üî¥ CRITIQUE'
    if p2 > 10: return 'üü† VIGILANCE'
    if p2 > 5:  return 'üü° OBSERVATION'
    return 'üü¢ SAIN'

# --- 3. PR√âPARATION DES FEATURES SUR LE P√âRIM√àTRE FILTR√â ---
df_inf = df_vivantes.copy()

df_inf['age_au_diagnostic'] = pd.to_numeric(df_inf['age_estime'], errors='coerce').fillna(0)
df_inf['Tranche_effectif_num'] = pd.to_numeric(df_inf['Tranche_effectif_num'], errors='coerce').fillna(0)
df_inf['risque_departemental'] = pd.to_numeric(df_inf["Code du d√©partement de l'√©tablissement"], errors='coerce').fillna(0)
df_inf['is_ess'] = 0 

if "Cat√©gorie juridique de l'unit√© l√©gale" in df_inf.columns:
    df_inf['CJ_prefix'] = df_inf["Cat√©gorie juridique de l'unit√© l√©gale"].astype(str).str[:4]
    df_inf = pd.concat([df_inf, pd.get_dummies(df_inf['CJ_prefix'], prefix='CJ')], axis=1)

if 'libelle_section_ape' in df_inf.columns:
    df_ape_dummies = pd.get_dummies(df_inf['libelle_section_ape'], prefix='APE')
    df_ape_dummies.columns = [c.strip() for c in df_ape_dummies.columns]
    df_inf = pd.concat([df_inf, df_ape_dummies], axis=1)

# --- 4. PR√âDICTION ---
X_inf = pd.DataFrame(index=df_vivantes.index)
for col in final_model.feature_names:
    if col in df_inf.columns:
        X_inf[col] = df_inf[col]
    else:
        match = [c for c in df_inf.columns if c.lower().strip() == col.lower().strip()]
        X_inf[col] = df_inf[match[0]] if match else 0

preds_mu = final_model.predict(xgb.DMatrix(X_inf.astype(float).fillna(0)))

# --- 5. CR√âATION DU DATASET DASHBOARD ---
df_dashboard = df_vivantes.copy()

df_dashboard['Prob_1an']  = calculate_survival_risk(preds_mu, 1)
df_dashboard['Prob_2ans'] = calculate_survival_risk(preds_mu, 2)
df_dashboard['Prob_3ans'] = calculate_survival_risk(preds_mu, 3)

df_dashboard['D√©nomination']  = df_vivantes["D√©nomination de l'unit√© l√©gale"]
df_dashboard['Indice_Risque']  = df_dashboard['Prob_2ans']
df_dashboard['Statut_Expert']  = df_dashboard['Prob_2ans'].apply(map_statut_expert_v3)

# --- 6. R√âORGANISATION ET EXPORT ---
cols_ordre = [
    'SIREN', 'D√©nomination', 'Statut_Expert', 'Indice_Risque', 
    'Prob_1an', 'Prob_2ans', 'Prob_3ans', 
    "Code postal de l'√©tablissement", "Code commune de l'√©tablissement",
    "Cat√©gorie juridique de l'unit√© l√©gale", "Activit√© principale de l'unit√© l√©gale",
    "Economie sociale et solidaire unit√© l√©gale", "Code du d√©partement de l'√©tablissement",
    "Code de la r√©gion de l'√©tablissement", "Date_fermeture_finale",
    "Tranche_effectif_num", "age_estime", "latitude", "longitude",
    "code_ape", "libelle_section_ape", "fermeture"
]

df_dashboard = df_dashboard[[c for c in cols_ordre if c in df_dashboard.columns]]
df_dashboard.to_parquet('Predictions_Risques_Survie_2026.parquet', index=False)

print(f"‚úÖ Analyse termin√©e sur les soci√©t√©s OUVERTES uniquement.")
print(f"üìâ Soci√©t√©s √©cart√©es (d√©j√† ferm√©es) : {len(df) - len(df_dashboard)}")
print(f"üìä Nouveau total √† analyser : {len(df_dashboard)}")

In [None]:
df_dashboard['Statut_Expert'].value_counts()

In [None]:
df_dashboard.head()

---