### Import des biblioth√©ques
---

In [1]:
import os
import warnings
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import boto3
import mlflow
import mlflow.sklearn

from dotenv import load_dotenv
from io import StringIO
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split
from sklearn.exceptions import UndefinedMetricWarning
from xgboost import XGBRegressor
from sksurv.util import Surv
from sksurv.metrics import concordance_index_censored


  mlflow.mismatch._check_version_mismatch()


In [8]:
print(mlflow.__version__)
print(pd.__version__)


3.6.0
2.3.0


### Configuration AWS et Chargement des donn√©es du S3
---

In [3]:
load_dotenv("../secrets.env")

AWS_KEY = os.getenv("AWS_ACCESS_KEY_ID")
AWS_SECRET = os.getenv("AWS_SECRET_ACCESS_KEY")
BUCKET_NAME = os.getenv("AWS_BUCKET_NAME")
AWS_REGION = os.getenv("AWS_REGION")

s3 = boto3.client(
    "s3",
    aws_access_key_id=AWS_KEY,
    aws_secret_access_key=AWS_SECRET
)

# Lecture directe depuis S3
obj = s3.get_object(Bucket=BUCKET_NAME, Key="dataset_complet_meteo.csv")
csv_str = obj['Body'].read().decode('utf-8')  # convertir les bytes en str
df = pd.read_csv(StringIO(csv_str), sep=";")

df.head(10)

  df = pd.read_csv(StringIO(csv_str), sep=";")


Unnamed: 0,RR,DRR,TN,HTN,TX,HTX,TM,TMNX,TNSOL,TN50,...,compteur feu log,compteur feu carr√©,Ann√©e,Mois,Nombre de feu par an,Nombre de feu par mois,jours_sans_pluie,jours_TX_sup_30,ETPGRILLE_7j,ville_proche
0,,,,,,,,,,,...,0.0,0.0,2006,6,3,3,,,,Figari
1,,,,,,,,,,,...,0.0,0.0,2006,6,3,3,,,,Figari
2,,,,,,,,,,,...,0.0,0.0,2006,6,3,3,,,,Figari
3,,,,,,,,,,,...,0.0,0.0,2007,10,2,2,,,,Figari
4,,,,,,,,,,,...,0.0,0.0,2007,10,2,2,,,,Figari
5,,,,,,,,,,,...,0.0,0.0,2008,9,1,1,,,,Figari
6,0.0,0.0,19.0,325.0,27.5,1243.0,22.3,23.25,18.3,18.7,...,0.0,0.0,2009,6,2,2,,,,Figari
7,0.0,0.0,15.6,406.0,32.2,1413.0,25.9,23.9,14.6,14.8,...,0.0,0.0,2009,6,2,2,,,,Figari
8,4.0,479.0,9.2,147.0,19.1,1120.0,14.7,14.15,7.8,8.4,...,0.0,0.0,2010,3,7,1,,,,Figari
9,0.0,0.0,8.1,230.0,25.1,1242.0,17.2,16.6,5.5,6.2,...,0.0,0.0,2010,5,7,1,,,,Figari


### Configuration Mlflow
---

In [5]:
mlflow_tracking_uri = os.environ.get("MLFLOW_TRACKING_URI")
mlflow.set_tracking_uri(mlflow_tracking_uri)
mlflow.set_experiment("projet_feu")

2026/01/14 12:15:51 INFO mlflow.tracking.fluent: Experiment with name 'projet_feu' does not exist. Creating a new experiment.


<Experiment: artifact_location='s3://myprojetfeu/mlflow/1', creation_time=1768389352634, experiment_id='1', last_update_time=1768389352634, lifecycle_stage='active', name='projet_feu', tags={}>

### Mod√©le XGBoost survival :cox
---
on ne pr√©dit pas 'feu ou pas feu' on predit 'quand le feu va arriver'

In [None]:
def train_model(df):
    # df = load_model_data()
    df = df.rename(columns={"Feu pr√©vu": "event", "d√©compte": "duration"})
    df["event"] = df["event"].astype(bool)
    df["duration"] = df["duration"].fillna(0)

    features = [
        "moyenne precipitations mois", "moyenne temperature mois",
        "moyenne evapotranspiration mois", "moyenne vitesse vent ann√©e",
        "moyenne vitesse vent mois", "moyenne temperature ann√©e",
        "RR", "UM", "ETPMON", "TN", "TX", "Nombre de feu par an",
        "Nombre de feu par mois", "jours_sans_pluie", "jours_TX_sup_30",
        "ETPGRILLE_7j", "compteur jours vers prochain feu",
        "compteur feu log", "Ann√©e", "Mois",
        "moyenne precipitations ann√©e", "moyenne evapotranspiration ann√©e",
    ]
    features = [f for f in features if f in df.columns]

    y_struct = Surv.from_dataframe("event", "duration", df)
    X_train, X_test, y_train, y_test = train_test_split(df[features], y_struct, test_size=0.3, random_state=42)
    ev_train, du_train = y_train["event"], y_train["duration"]
    ev_test, du_test = y_test["event"], y_test["duration"]

    model = Pipeline([
        ("imputer", SimpleImputer(strategy="median")),
        ("scaler", StandardScaler()),
        ("xgb", XGBRegressor(
            objective="survival:cox",
            n_estimators=100,
            learning_rate=0.05,
            max_depth=3,
            tree_method="hist",
            random_state=42,
        )),
    ])

    with mlflow.start_run(run_name="XGBSurv_Train"):
        # 1. Entra√Ænement
        model.fit(X_train, du_train, xgb__sample_weight=ev_train)

        # 2. Log hyperparam√®tres
        mlflow.log_params({"n_estimators":100, "learning_rate":0.05, "max_depth":3, "tree_method":"hist"})

        # 3. Calcul des pr√©dictions et m√©triques
        log_hr_test = model.predict(X_test)
        c_index = concordance_index_censored(ev_test, du_test, log_hr_test)[0]
        print(f"C-index (test) : {c_index:.3f}")
        mlflow.log_metric("c_index_test", c_index)

        # 4. Signature et Log du mod√®le (L'optimisation)
        from mlflow.models.signature import infer_signature
        signature = infer_signature(X_test, log_hr_test)
    
        mlflow.sklearn.log_model(
            model, 
            artifact_path="survival_xgb_model",
            signature=signature
        )

        # 5. Cr√©ation des fichiers visuels et CSV
        fig, ax = plt.subplots(figsize=(6,4))
        sns.histplot(log_hr_test, bins=30, ax=ax)
        ax.set_title("Distribution log hazard (test)")
        fig_path = "log_hazard_test.png"
        fig.savefig(fig_path)
        plt.close(fig)

        df_test_pred = pd.DataFrame({"duration": du_test, "event": ev_test, "log_hazard_pred": log_hr_test})
        csv_path = "predictions_test.csv"
        df_test_pred.to_csv(csv_path, index=False)

        # 6. Envoi des fichiers (Artifacts) vers HF/S3
        mlflow.log_artifact(fig_path)
        mlflow.log_artifact(csv_path)

        # 7. Nettoyage local (pour rester propre sur ton PC)
        os.remove(fig_path)
        os.remove(csv_path)

    print("Tout est sur Hugging Face et S3.")

In [7]:
if __name__ == "__main__":
    train_model(df)

C-index (test) : 0.809




üèÉ View run XGBSurv_Train at: https://nath13huggingface-mlflow-feu-serveur.hf.space/#/experiments/1/runs/defbd87835294767b3861bae4fb65bbb
üß™ View experiment at: https://nath13huggingface-mlflow-feu-serveur.hf.space/#/experiments/1
Tout est sur Hugging Face et S3.
