In [1]:
from health_lifestyle_diabetes.infrastructure.logger.config import configure_logging
from health_lifestyle_diabetes.infrastructure.logger.loguru_logger import LoguruLogger
from health_lifestyle_diabetes.infrastructure.tracking.mlflow_tracker import MLflowExperimentTracker
from health_lifestyle_diabetes.application.services.experiment_tracking_service import ExperimentTrackingService
from health_lifestyle_diabetes.infrastructure.tracking.run_name_generator import generate_run_name
from datetime import datetime
import random

# 1) Fonction pour simuler des métriques d'entraînement
def simulate_training_metrics() -> dict:
    """
    Simulate model evaluation metrics for a binary classification task.
    """
    auc = round(random.uniform(0.70, 0.90), 3)
    precision = round(random.uniform(0.65, 0.90), 3)
    recall = round(random.uniform(0.60, 0.88), 3)

    f1_score = round(
        2 * precision * recall / (precision + recall + 1e-8), 3
    )

    accuracy = round(random.uniform(0.70, 0.88), 3)
    balanced_accuracy = round(random.uniform(0.68, 0.86), 3)
    log_loss = round(random.uniform(0.35, 0.65), 3)

    error_rate = round(1 - accuracy, 3)

    return {
        "auc": auc,
        "accuracy": accuracy,
        "balanced_accuracy": balanced_accuracy,
        "precision": precision,
        "recall": recall,
        "f1_score": f1_score,
        "log_loss": log_loss,
        "error_rate": error_rate,
    }

# 2) Configuration du logging global
configure_logging(env="dev")

# 3) Injection des dépendances
logger = LoguruLogger()
ml_tracker = MLflowExperimentTracker(logger=logger)
tracking = ExperimentTrackingService(tracker=ml_tracker, logger=logger)

# 4) Démarrage de l'expérience
EXPERIMENT_NAME = "health_lifestyle_diabetes"
PREFIX_NAME = "Catb"
MODEL_NAME = generate_run_name(prefix=PREFIX_NAME)

print(f"Starting experiment '{EXPERIMENT_NAME}' with run name '{MODEL_NAME}'")

tracking.start_experiment(
    experiment_name=EXPERIMENT_NAME,
    run_name=MODEL_NAME
)

# 5) Exemple de paramètres du modèle
params = {
    "max_depth": 6,
    "learning_rate": 0.05,
    "n_estimators": 300,
    "subsample": 0.8
}
tracking.log_training_context(model_name="XGBoostClassifier", params=params)

# 6) Exemple de métriques post-entraînement
# Simulate training => Model training simulation started
metrics = simulate_training_metrics()
tracking.log_evaluation(metrics)

# 7) Exemple d'artefact (simulation d'un fichier de modèle)
#model_path = "models/xgb_baseline.pkl"
#with open(model_path, "w") as f:
#    f.write("simulated binary content of a ML model")
#tracking.log_artifact(model_path)

# 8) Clôture propre de la run
tracking.close()


# -----------------------------
# RÉSULTAT ATTENDU
# -----------------------------
# - Une expérience "diabetes_prediction" est visible dans MLflow
# - Une run "baseline_xgboost" est enregistrée
# - Les paramètres, métriques et artefacts sont disponibles dans l'UI MLflow
# 
# Pour lancer l'interface MLflow :
#     poetry run mlflow ui
# Puis ouvrir http://localhost:5000

[32m2025-12-25 17:33:49[0m | [1mINFO    [0m | [36mconfig.py:67[0m | [33mconfigure_logging()[0m | Loguru configuré avec succès (mode: dev) | {'env': 'dev'}
[32m2025-12-25 17:33:49[0m | [34m[1mDEBUG   [0m | [36mmlflow_setup.py:36[0m | [33m__init__()[0m | Tracking URI : sqlite://///Users/surelmanda/Downloads/ml-projects/mlflow_central/db/mlruns.db | {}
[32m2025-12-25 17:33:49[0m | [34m[1mDEBUG   [0m | [36mmlflow_setup.py:37[0m | [33m__init__()[0m | Artifact URI : /Users/surelmanda/Downloads/ml-projects/mlflow_central/mlflow_artifacts | {}


2025/12/25 17:33:50 INFO mlflow.store.db.utils: Creating initial MLflow database tables...
2025/12/25 17:33:50 INFO mlflow.store.db.utils: Updating database tables
2025/12/25 17:33:50 INFO alembic.runtime.migration: Context impl SQLiteImpl.
2025/12/25 17:33:50 INFO alembic.runtime.migration: Will assume non-transactional DDL.
2025/12/25 17:33:50 INFO alembic.runtime.migration: Context impl SQLiteImpl.
2025/12/25 17:33:50 INFO alembic.runtime.migration: Will assume non-transactional DDL.


[32m2025-12-25 17:33:50[0m | [1mINFO    [0m | [36mmlflow_setup.py:53[0m | [33mconfigure()[0m | MLflow configuré avec succès. | {}
Starting experiment 'health_lifestyle_diabetes' with run name 'Catb_2025-12-25_17h33m50s'
[32m2025-12-25 17:33:50[0m | [1mINFO    [0m | [36mmlflow_tracker.py:25[0m | [33m__init__()[0m | MLflowExperimentTracker prêt. | {}
[32m2025-12-25 17:33:50[0m | [1mINFO    [0m | [36mexperiment_tracking_service.py:23[0m | [33m__init__()[0m | ExperimentTrackingService initialisé. | {}
[32m2025-12-25 17:33:50[0m | [1mINFO    [0m | [36mexperiment_tracking_service.py:30[0m | [33mstart_experiment()[0m | Initialisation de l'expérience 'health_lifestyle_diabetes' (run='Catb_2025-12-25_17h33m50s') | {}
[32m2025-12-25 17:33:50[0m | [1mINFO    [0m | [36mmlflow_tracker.py:56[0m | [33msetup_experiment()[0m | Expérience active : health_lifestyle_diabetes | {}
[32m2025-12-25 17:33:50[0m | [1mINFO    [0m | [36mmlflow_tracker.py:70[0m | [33m

# CatBoost + MLflow (Clean Architecture)

In [None]:
# ============================================================
# 0) Imports - Dépendances Clean Architecture
# ============================================================
from health_lifestyle_diabetes.infrastructure.logger.config import configure_logging
from health_lifestyle_diabetes.infrastructure.logger.loguru_logger import LoguruLogger
from health_lifestyle_diabetes.infrastructure.tracking.mlflow_tracker import MLflowExperimentTracker
from health_lifestyle_diabetes.application.services.experiment_tracking_service import ExperimentTrackingService
from health_lifestyle_diabetes.infrastructure.tracking.run_name_generator import generate_run_name

# ============================================================
# 1) Dataset (réel) - Breast Cancer
# ============================================================
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score
from catboost import CatBoostClassifier

# Charger le dataset
data = load_breast_cancer(as_frame=True)
X = data.data
y = data.target

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# ============================================================
# 2) Config log + injection dépendances
# ============================================================
configure_logging(env="dev")

logger = LoguruLogger()
tracker = MLflowExperimentTracker(logger)
tracking = ExperimentTrackingService(tracker=tracker, logger=logger)

# ============================================================
# 3) Démarrer une expérience CatBoost dans MLflow
# ============================================================
EXPERIMENT_NAME = "health_lifestyle_diabetes"
RUN_NAME = generate_run_name(prefix="CATB")
print(f"Starting experiment '{EXPERIMENT_NAME}' with run name '{MODEL_NAME}'")


tracking.start_experiment(
    experiment_name=EXPERIMENT_NAME,
    run_name=RUN_NAME
)

# ============================================================
# 4) Définition du modèle + log des paramètres
# ============================================================
model_params = {
    "iterations": 300,
    "learning_rate": 0.05,
    "depth": 6,
    "loss_function": "Logloss",
    "eval_metric": "AUC"
}

model = CatBoostClassifier(**model_params, verbose=False)

tracking.log_training_context(
    model_name="CatBoostClassifier",
    params=model_params
)

# ============================================================
# 5) Entraînement réel
# ============================================================
model.fit(X_train, y_train)

# ============================================================
# 6) Évaluation
# ============================================================
y_pred = model.predict(X_test)
y_proba = model.predict_proba(X_test)[:, 1]

metrics = {
    "accuracy": accuracy_score(y_test, y_pred),
    "f1_score": f1_score(y_test, y_pred),
    "auc": roc_auc_score(y_test, y_proba)
}

tracking.log_evaluation(metrics)

# ============================================================
# 7) Sauvegarde modèle en artefact
# ============================================================
#model_path = f"models/{RUN_NAME}.cbm"
#model.save_model(model_path)
#tracking.log_artifact(model_path)

# ============================================================
# 8) Fin de l'expérience
# ============================================================
tracking.close()

print("\nExécution terminée. Vérifier MLflow UI.")

[32m2025-12-25 17:56:39[0m | [1mINFO    [0m | [36mconfig.py:67[0m | [33mconfigure_logging()[0m | Loguru configuré avec succès (mode: dev) | {'env': 'dev'}
[32m2025-12-25 17:56:39[0m | [34m[1mDEBUG   [0m | [36mmlflow_setup.py:36[0m | [33m__init__()[0m | Tracking URI : sqlite://///Users/surelmanda/Downloads/ml-projects/mlflow_central/db/mlruns.db | {}
[32m2025-12-25 17:56:39[0m | [34m[1mDEBUG   [0m | [36mmlflow_setup.py:37[0m | [33m__init__()[0m | Artifact URI : /Users/surelmanda/Downloads/ml-projects/mlflow_central/mlflow_artifacts | {}


2025/12/25 17:56:40 INFO mlflow.store.db.utils: Creating initial MLflow database tables...
2025/12/25 17:56:40 INFO mlflow.store.db.utils: Updating database tables
2025/12/25 17:56:40 INFO alembic.runtime.migration: Context impl SQLiteImpl.
2025/12/25 17:56:40 INFO alembic.runtime.migration: Will assume non-transactional DDL.
2025/12/25 17:56:40 INFO alembic.runtime.migration: Context impl SQLiteImpl.
2025/12/25 17:56:40 INFO alembic.runtime.migration: Will assume non-transactional DDL.


[32m2025-12-25 17:56:40[0m | [1mINFO    [0m | [36mmlflow_setup.py:53[0m | [33mconfigure()[0m | MLflow configuré avec succès. | {}
[32m2025-12-25 17:56:40[0m | [1mINFO    [0m | [36mmlflow_tracker.py:25[0m | [33m__init__()[0m | MLflowExperimentTracker prêt. | {}
[32m2025-12-25 17:56:40[0m | [1mINFO    [0m | [36mexperiment_tracking_service.py:23[0m | [33m__init__()[0m | ExperimentTrackingService initialisé. | {}
[32m2025-12-25 17:56:40[0m | [1mINFO    [0m | [36mexperiment_tracking_service.py:30[0m | [33mstart_experiment()[0m | Initialisation de l'expérience 'health_lifestyle_diabetes' (run='CATB_2025-12-25_17h56m40s') | {}
[32m2025-12-25 17:56:40[0m | [1mINFO    [0m | [36mmlflow_tracker.py:56[0m | [33msetup_experiment()[0m | Expérience active : health_lifestyle_diabetes | {}
[32m2025-12-25 17:56:40[0m | [1mINFO    [0m | [36mmlflow_tracker.py:70[0m | [33mstart_run()[0m | Run démarrée : CATB_2025-12-25_17h56m40s | {}
[32m2025-12-25 17:56:40[

# XGBoost + MLflow (Clean Architecture)

In [None]:
# ============================================================
# 0) Imports - Architecture (Clean)
# ============================================================
from health_lifestyle_diabetes.infrastructure.logger.config import configure_logging
from health_lifestyle_diabetes.infrastructure.logger.loguru_logger import LoguruLogger
from health_lifestyle_diabetes.infrastructure.tracking.mlflow_tracker import MLflowExperimentTracker
from health_lifestyle_diabetes.application.services.experiment_tracking_service import ExperimentTrackingService
from health_lifestyle_diabetes.infrastructure.tracking.run_name_generator import generate_run_name

# ============================================================
# 1) Imports - Dataset & XGBoost
# ============================================================
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score
from xgboost import XGBClassifier

# ============================================================
# 2) Dataset réel
# ============================================================
data = load_breast_cancer(as_frame=True)
X = data.data
y = data.target

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# ============================================================
# 3) Logging global + dépendances
# ============================================================
configure_logging(env="dev")

logger = LoguruLogger()
tracker = MLflowExperimentTracker(logger=logger)
tracking = ExperimentTrackingService(tracker=tracker, logger=logger)

# ============================================================
# 4) Démarrage expérience MLflow
# ============================================================
EXPERIMENT_NAME = "health_lifestyle_diabetes"
RUN_NAME = generate_run_name(prefix="XGB")
print(f"Starting experiment '{EXPERIMENT_NAME}' with run name '{MODEL_NAME}'")


tracking.start_experiment(
    experiment_name=EXPERIMENT_NAME,
    run_name=RUN_NAME
)

# ============================================================
# 5) Modèle XGBoost + paramètres
# ============================================================
model_params = {
    "n_estimators": 300,
    "max_depth": 6,
    "learning_rate": 0.05,
    "subsample": 0.8,
    "eval_metric": "logloss",
    #"use_label_encoder": False
}

model = XGBClassifier(**model_params)

# Log contexte d'entraînement dans MLflow
tracking.log_training_context(
    model_name="XGBClassifier",
    params=model_params
)

# ============================================================
# 6) Entraînement réel
# ============================================================
model.fit(X_train, y_train)

# ============================================================
# 7) Évaluation
# ============================================================
y_pred = model.predict(X_test)
y_proba = model.predict_proba(X_test)[:, 1]

metrics = {
    "accuracy": accuracy_score(y_test, y_pred),
    "f1_score": f1_score(y_test, y_pred),
    "auc": roc_auc_score(y_test, y_proba)
}

tracking.log_evaluation(metrics)

# ============================================================
# 8) Artefact : sauvegarde du modèle
# ============================================================
#model_path = f"models/{RUN_NAME}.json"
#model.save_model(model_path)
#tracking.log_artifact(model_path)

# ============================================================
# 9) Fin de l'expérience
# ============================================================
tracking.close()

print("\nExécution terminée. Consulte MLflow UI.")

[32m2025-12-25 17:57:50[0m | [1mINFO    [0m | [36mconfig.py:67[0m | [33mconfigure_logging()[0m | Loguru configuré avec succès (mode: dev) | {'env': 'dev'}
[32m2025-12-25 17:57:50[0m | [34m[1mDEBUG   [0m | [36mmlflow_setup.py:36[0m | [33m__init__()[0m | Tracking URI : sqlite://///Users/surelmanda/Downloads/ml-projects/mlflow_central/db/mlruns.db | {}
[32m2025-12-25 17:57:50[0m | [34m[1mDEBUG   [0m | [36mmlflow_setup.py:37[0m | [33m__init__()[0m | Artifact URI : /Users/surelmanda/Downloads/ml-projects/mlflow_central/mlflow_artifacts | {}
[32m2025-12-25 17:57:50[0m | [1mINFO    [0m | [36mmlflow_setup.py:53[0m | [33mconfigure()[0m | MLflow configuré avec succès. | {}
[32m2025-12-25 17:57:50[0m | [1mINFO    [0m | [36mmlflow_tracker.py:25[0m | [33m__init__()[0m | MLflowExperimentTracker prêt. | {}
[32m2025-12-25 17:57:50[0m | [1mINFO    [0m | [36mexperiment_tracking_service.py:23[0m | [33m__init__()[0m | ExperimentTrackingService initialisé. 

# LightGBM + MLflow (Clean Architecture)

In [None]:
# ============================================================
# 0) Imports - Architecture (Clean)
# ============================================================
from health_lifestyle_diabetes.infrastructure.logger.config import configure_logging
from health_lifestyle_diabetes.infrastructure.logger.loguru_logger import LoguruLogger
from health_lifestyle_diabetes.infrastructure.tracking.mlflow_tracker import MLflowExperimentTracker
from health_lifestyle_diabetes.application.services.experiment_tracking_service import ExperimentTrackingService
from health_lifestyle_diabetes.infrastructure.tracking.run_name_generator import generate_run_name

# ============================================================
# 1) Imports - Dataset & LightGBM
# ============================================================
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score
from lightgbm import LGBMClassifier

# ============================================================
# 2) Dataset réel
# ============================================================
data = load_breast_cancer(as_frame=True)
X = data.data
y = data.target

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# ============================================================
# 3) Logging global + dépendances
# ============================================================
configure_logging(env="dev")

logger = LoguruLogger()
tracker = MLflowExperimentTracker(logger=logger)
tracking = ExperimentTrackingService(tracker=tracker, logger=logger)

# ============================================================
# 4) Démarrage de l'expérience MLflow
# ============================================================
EXPERIMENT_NAME = "health_lifestyle_diabetes"
RUN_NAME = generate_run_name(prefix="LGBM")
print(f"Starting experiment '{EXPERIMENT_NAME}' with run name '{MODEL_NAME}'")

tracking.start_experiment(
    experiment_name=EXPERIMENT_NAME,
    run_name=RUN_NAME
)

# ============================================================
# 5) Définition du modèle LightGBM
# ============================================================
model_params = {
    "n_estimators": 300,
    "learning_rate": 0.05,
    "num_leaves": 31,
    "objective": "binary"
}

model = LGBMClassifier(**model_params)

# Log des paramètres dans MLflow
tracking.log_training_context(
    model_name="LGBMClassifier",
    params=model_params
)

# ============================================================
# 6) Entraînement réel
# ============================================================
model.fit(X_train, y_train)

# ============================================================
# 7) Évaluation
# ============================================================
y_pred = model.predict(X_test)
y_proba = model.predict_proba(X_test)[:, 1]

metrics = {
    "accuracy": accuracy_score(y_test, y_pred),
    "f1_score": f1_score(y_test, y_pred),
    "auc": roc_auc_score(y_test, y_proba)
}

tracking.log_evaluation(metrics)

# ============================================================
# 8) Artefact : sauvegarde du modèle
# ============================================================
#model_path = f"models/{RUN_NAME}.txt"
#model.booster_.save_model(model_path)  # Booster model save

#tracking.log_artifact(model_path)

# ============================================================
# 9) Fin de l'expérience
# ============================================================
tracking.close()

print("\nLightGBM - Exécution terminée. Consultez l'interface MLflow.")

[32m2025-12-25 17:57:54[0m | [1mINFO    [0m | [36mconfig.py:67[0m | [33mconfigure_logging()[0m | Loguru configuré avec succès (mode: dev) | {'env': 'dev'}
[32m2025-12-25 17:57:54[0m | [34m[1mDEBUG   [0m | [36mmlflow_setup.py:36[0m | [33m__init__()[0m | Tracking URI : sqlite://///Users/surelmanda/Downloads/ml-projects/mlflow_central/db/mlruns.db | {}
[32m2025-12-25 17:57:54[0m | [34m[1mDEBUG   [0m | [36mmlflow_setup.py:37[0m | [33m__init__()[0m | Artifact URI : /Users/surelmanda/Downloads/ml-projects/mlflow_central/mlflow_artifacts | {}
[32m2025-12-25 17:57:54[0m | [1mINFO    [0m | [36mmlflow_setup.py:53[0m | [33mconfigure()[0m | MLflow configuré avec succès. | {}
[32m2025-12-25 17:57:54[0m | [1mINFO    [0m | [36mmlflow_tracker.py:25[0m | [33m__init__()[0m | MLflowExperimentTracker prêt. | {}
[32m2025-12-25 17:57:54[0m | [1mINFO    [0m | [36mexperiment_tracking_service.py:23[0m | [33m__init__()[0m | ExperimentTrackingService initialisé. 