1. üì¶ Configuration et Imports

In [4]:
# Imports
import pandas as pd
import numpy as np
import pickle
import os
from datetime import datetime
from pathlib import Path

# MLflow
import mlflow
import mlflow.sklearn
from dotenv import load_dotenv

# ML Models
from sklearn.ensemble import RandomForestClassifier, StackingClassifier, VotingClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier
from sklearn.linear_model import LogisticRegression

# Tuning
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import randint, uniform

# Metrics
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score,
    f1_score, roc_auc_score
)

import warnings
warnings.filterwarnings('ignore')

print("‚úÖ Imports termin√©s")

‚úÖ Imports termin√©s


In [5]:
# CONFIGURATION MLFLOW LOCAL (File-based backend)
# Utilise un backend local bas√© sur fichiers au lieu d'un serveur
MLFLOW_TRACKING_URI = "./mlruns"  # Dossier local pour stocker les runs
EXPERIMENT_NAME = "churn_prediction"

# Cr√©er le dossier mlruns s'il n'existe pas
os.makedirs(MLFLOW_TRACKING_URI, exist_ok=True)

# MLflow local (file-based)
mlflow.set_tracking_uri(f"file:///{os.path.abspath(MLFLOW_TRACKING_URI)}")
mlflow.set_experiment(EXPERIMENT_NAME)

# D√©sactiver les features incompatibles (s√©curit√©)
os.environ["MLFLOW_ENABLE_LOGGED_MODEL_CREATION"] = "false"

print("‚úÖ MLflow configur√©")
print(f"üìä Tracking URI: file:///{os.path.abspath(MLFLOW_TRACKING_URI)}")
print(f"üß™ Experiment: {EXPERIMENT_NAME}")
print(f"üí° Note: Utilisation d'un backend local (pas de serveur requis)")

‚úÖ MLflow configur√©
üìä Tracking URI: file:///c:\Users\Barky\Documents\I3\S1\MLOps\Project\MLOps_Bank_Churn\notebooks\mlruns
üß™ Experiment: churn_prediction
üí° Note: Utilisation d'un backend local (pas de serveur requis)


2. üìÇ Chargement des Donn√©es

In [6]:
# Charger les donn√©es preprocess√©es
DATA_PATH = 'processors/preprocessed_data.pkl'

with open(DATA_PATH, 'rb') as f:
    data = pickle.load(f)

X_train = data['X_train']
X_test = data['X_test']
y_train = data['y_train']
y_test = data['y_test']

print("‚úÖ Donn√©es charg√©es")
print(f"   Train: {X_train.shape}")
print(f"   Test: {X_test.shape}")

‚úÖ Donn√©es charg√©es
   Train: (42070, 40)
   Test: (6000, 40)


3. ü§ñ Fonctions Utilitaires

In [7]:
# Fonction pour calculer les m√©triques
def calculate_metrics(y_true, y_pred, y_proba):
    return {
        'accuracy': accuracy_score(y_true, y_pred),
        'precision': precision_score(y_true, y_pred),
        'recall': recall_score(y_true, y_pred),
        'f1_score': f1_score(y_true, y_pred),
        'roc_auc': roc_auc_score(y_true, y_proba)
    }

# Fonction pour logger un mod√®le dans MLflow
def log_model_mlflow(model, model_name, stage, metrics, duration, best_params=None):
    """
    Log un mod√®le dans MLflow
    """
    with mlflow.start_run(run_name=f"{model_name}_{stage}"):
        # Log params
        mlflow.log_param('model_name', model_name)
        mlflow.log_param('stage', stage)
        mlflow.log_param('n_features', X_train.shape[1])
        
        # Log best params si disponibles
        if best_params:
            for k, v in best_params.items():
                try:
                    mlflow.log_param(f'best_{k}', v)
                except:
                    pass
        
        # Log metrics
        for metric_name, metric_value in metrics.items():
            mlflow.log_metric(metric_name, metric_value)
        mlflow.log_metric('training_duration', duration)
        
        # Sauvegarder le mod√®le localement
        model_filename = f"{model_name}_{stage}.pkl"
        with open(model_filename, 'wb') as f:
            pickle.dump(model, f)
        
        # Log comme artifact
        try:
            mlflow.log_artifact(model_filename)
        except:
            pass
        
        run_id = mlflow.active_run().info.run_id
        return run_id, model_filename

print("‚úÖ Fonctions utilitaires d√©finies")

‚úÖ Fonctions utilitaires d√©finies


4. üöÄ Entra√Ænement des Mod√®les Baseline (4 mod√®les)

In [8]:
# D√©finir les mod√®les baseline
baseline_config = {
        'XGBoost': XGBClassifier(
            n_estimators=150,
            max_depth=7,
            learning_rate=0.05,
            subsample=0.85,
            colsample_bytree=0.85,
            min_child_weight=3,
            gamma=0.1,
            random_state=42,
            eval_metric='auc',
            use_label_encoder=False,
            tree_method='hist'
        ),
        
        'LightGBM': LGBMClassifier(
            n_estimators=150,
            max_depth=8,
            learning_rate=0.05,
            num_leaves=40,
            min_child_samples=25,
            subsample=0.85,
            colsample_bytree=0.85,
            class_weight='balanced',
            random_state=42,
            n_jobs=-1,
            verbose=-1,
            importance_type='gain'
        ),
        
        'Random_Forest': RandomForestClassifier(
            n_estimators=200,
            max_depth=25,
            min_samples_split=15,
            min_samples_leaf=5,
            max_features='sqrt',
            class_weight='balanced_subsample',
            bootstrap=True,
            random_state=42,
            n_jobs=-1,
            warm_start=False
        ),
        
        'CatBoost': CatBoostClassifier(
            iterations=150,
            depth=7,
            learning_rate=0.05,
            l2_leaf_reg=3,
            border_count=128,
            auto_class_weights='Balanced',
            random_state=42,
            verbose=False,
            task_type='CPU',
            bootstrap_type='Bernoulli',
            subsample=0.85
        ),
        
        'Logistic_Regression_ElasticNet': LogisticRegression(
            penalty='elasticnet',
            C=1.0,
            l1_ratio=0.5,
            solver='saga',
            max_iter=1000,
            class_weight='balanced',
            random_state=42,
            n_jobs=-1,
            warm_start=False
        )
    }

baseline_results = []
trained_models = {}

print("üöÄ Entra√Ænement des mod√®les BASELINE...\n")

for name, model in baseline_config.items():
    print(f"üìä {name}...", end=" ")
    start = datetime.now()
    
    # Entra√Ænement
    model.fit(X_train, y_train)
    
    # Pr√©dictions
    y_pred = model.predict(X_test)
    y_proba = model.predict_proba(X_test)[:, 1]
    
    # M√©triques
    metrics = calculate_metrics(y_test, y_pred, y_proba)
    duration = (datetime.now() - start).total_seconds()
    
    # Log dans MLflow
    run_id, model_file = log_model_mlflow(model, name, 'baseline', metrics, duration)
    
    # Stocker
    trained_models[f"{name}_baseline"] = model
    baseline_results.append({
        'model': name,
        'stage': 'baseline',
        'run_id': run_id,
        **metrics,
        'duration': duration
    })
    
    print(f"ROC-AUC: {metrics['roc_auc']:.4f} ({duration:.1f}s)")

print("\n‚úÖ Baseline termin√©!")

üöÄ Entra√Ænement des mod√®les BASELINE...

üìä XGBoost... ROC-AUC: 0.9995 (2.3s)
üìä LightGBM... ROC-AUC: 0.9995 (1.9s)
üìä Random_Forest... ROC-AUC: 0.9971 (8.7s)
üìä CatBoost... ROC-AUC: 0.9998 (4.0s)
üìä Logistic_Regression_ElasticNet... ROC-AUC: 0.9998 (38.0s)

‚úÖ Baseline termin√©!


5. üîç Fine-Tuning (4 mod√®les avec n_iter=5)

In [9]:
# Grilles de recherche simplifi√©es
search_spaces = {
        'XGBoost': {
            'n_estimators': randint(100, 500),
            'max_depth': randint(3, 12),
            'learning_rate': uniform(0.01, 0.29),
            'subsample': uniform(0.6, 0.4),
            'colsample_bytree': uniform(0.6, 0.4),
            'gamma': uniform(0, 0.5),
            'min_child_weight': randint(1, 10),
            'reg_alpha': uniform(0, 1),
            'reg_lambda': uniform(0, 2)
        },
        
        'LightGBM': {
            'n_estimators': randint(100, 500),
            'max_depth': randint(-1, 15),
            'learning_rate': uniform(0.01, 0.29),
            'num_leaves': randint(15, 150),
            'min_child_samples': randint(10, 60),
            'subsample': uniform(0.6, 0.4),
            'colsample_bytree': uniform(0.6, 0.4),
            'reg_alpha': uniform(0, 1),
            'reg_lambda': uniform(0, 2),
            'min_split_gain': uniform(0, 0.1)
        },
        
        'Random_Forest': {
            'n_estimators': randint(100, 500),
            'max_depth': [15, 20, 25, 30, 35, None],
            'min_samples_split': randint(2, 20),
            'min_samples_leaf': randint(1, 10),
            'max_features': ['sqrt', 'log2', 0.5, 0.7, 0.9],
            'max_samples': uniform(0.7, 0.3),
            'class_weight': ['balanced', 'balanced_subsample']
        },
        
        'CatBoost': {
            'iterations': randint(100, 500),
            'depth': randint(4, 11),
            'learning_rate': uniform(0.01, 0.29),
            'l2_leaf_reg': uniform(1, 9),
            'border_count': [32, 64, 128, 200, 254],
            'random_strength': uniform(0, 2)
        },
        
        #Logistic Regression (ElasticNet)
        'Logistic_Regression_ElasticNet': {
            'C': uniform(0.001, 10),
            'l1_ratio': uniform(0, 1),
            'max_iter': randint(500, 2000),
            'tol': uniform(1e-5, 1e-3)
        }
    }

tuned_results = []
N_ITER = 10  # Nombre d'it√©rations
CV_FOLDS = 3

print(f"üîç Fine-Tuning ({N_ITER} iterations √ó {CV_FOLDS} folds)...\n")

for name, base_model in baseline_config.items():
    print(f"üìä {name}...", end=" ")
    start = datetime.now()
    
    # RandomizedSearchCV
    search = RandomizedSearchCV(
        base_model,
        search_spaces[name],
        n_iter=N_ITER,
        cv=CV_FOLDS,
        scoring='roc_auc',
        n_jobs=-1,
        random_state=42,
        verbose=0
    )
    
    search.fit(X_train, y_train)
    best_model = search.best_estimator_
    
    # Pr√©dictions
    y_pred = best_model.predict(X_test)
    y_proba = best_model.predict_proba(X_test)[:, 1]
    
    # M√©triques
    metrics = calculate_metrics(y_test, y_pred, y_proba)
    duration = (datetime.now() - start).total_seconds()
    
    # Log dans MLflow
    run_id, model_file = log_model_mlflow(
        best_model, name, 'tuned', metrics, duration, search.best_params_
    )
    
    # Stocker
    trained_models[f"{name}_tuned"] = best_model
    tuned_results.append({
        'model': name,
        'stage': 'tuned',
        'run_id': run_id,
        **metrics,
        'duration': duration
    })
    
    print(f"ROC-AUC: {metrics['roc_auc']:.4f} ({duration:.1f}s)")

print("\n‚úÖ Fine-tuning termin√©!")

üîç Fine-Tuning (10 iterations √ó 3 folds)...

üìä XGBoost... ROC-AUC: 0.9997 (45.0s)
üìä LightGBM... ROC-AUC: 0.9997 (44.9s)
üìä Random_Forest... ROC-AUC: 0.9974 (309.6s)
üìä CatBoost... ROC-AUC: 0.9998 (245.5s)
üìä Logistic_Regression_ElasticNet... ROC-AUC: 0.9999 (236.6s)

‚úÖ Fine-tuning termin√©!


6. üéØ Stacking Ensembles (4 mod√®les)

In [10]:
estimators = [
    #('xgb', trained_models['XGBoost_tuned']),
    ('lgbm', trained_models['LightGBM_tuned']),
    ('rf', trained_models['Random_Forest_tuned']),
    ('cat', trained_models['CatBoost_tuned']),
    ('lr', trained_models['Logistic_Regression_ElasticNet_tuned'])
]

ensemble_results = []

meta_learner_config=None

print("üöÄ Entra√Ænement des mod√®les ENSEMBLE...\n")

# 1. Stacking avec Logistic Regression
print("üìä Stacking (LogReg)...", end=" ")

if meta_learner_config is None:
    meta_learner = LogisticRegression(
        penalty='elasticnet',
        C=0.5,
        l1_ratio=0.3,
        solver='saga',
        max_iter=1500,
        random_state=42,
        class_weight='balanced',
        n_jobs=-1
    )
else:
    meta_learner = meta_learner_config

print(f"\nüß† Meta-Learner (Stacking):")
print(f"   ‚Ä¢ Type: {type(meta_learner).__name__}")
print(f"   ‚Ä¢ Configuration: R√©gularisation ElasticNet")

start = datetime.now()

stacking_lr = StackingClassifier(
        estimators=estimators,
        final_estimator=meta_learner,
        cv=5,
        stack_method='predict_proba',
        n_jobs=-1,
        passthrough=False,
        verbose=0
    )

stacking_lr.fit(X_train, y_train)
y_pred = stacking_lr.predict(X_test)
y_proba = stacking_lr.predict_proba(X_test)[:, 1]

metrics_stack_lr = calculate_metrics(y_test, y_pred, y_proba)
duration = (datetime.now() - start).total_seconds()

run_id_lr, _ = log_model_mlflow(stacking_lr, 'Stacking_LR', 'ensemble', metrics_stack_lr, duration)
trained_models['Stacking_LR'] = stacking_lr
ensemble_results.append({
    'model': 'Stacking_LR',
    'stage': 'ensemble',
    'run_id': run_id_lr,
    **metrics_stack_lr,
    'duration': duration
})

print(f"ROC-AUC: {metrics_stack_lr['roc_auc']:.4f} ({duration:.1f}s)")

# 2. Voting Classifier (soft voting)
print("üìä Voting (Soft)...", end=" ")
start = datetime.now()

voting_soft = VotingClassifier(
        estimators=estimators,
        voting='soft',
        n_jobs=-1,
        flatten_transform=True,
        verbose=False
    )

voting_soft.fit(X_train, y_train)
y_pred = voting_soft.predict(X_test)
y_proba = voting_soft.predict_proba(X_test)[:, 1]

metrics_voting_soft = calculate_metrics(y_test, y_pred, y_proba)
duration = (datetime.now() - start).total_seconds()

run_id_soft, _ = log_model_mlflow(voting_soft, 'Voting_Soft', 'ensemble', metrics_voting_soft, duration)
trained_models['Voting_Soft'] = voting_soft
ensemble_results.append({
    'model': 'Voting_Soft',
    'stage': 'ensemble',
    'run_id': run_id_soft,
    **metrics_voting_soft,
    'duration': duration
})

print(f"ROC-AUC: {metrics_voting_soft['roc_auc']:.4f} ({duration:.1f}s)")

print("\n‚úÖ Ensembles termin√©s!")

üöÄ Entra√Ænement des mod√®les ENSEMBLE...

üìä Stacking (LogReg)... 
üß† Meta-Learner (Stacking):
   ‚Ä¢ Type: LogisticRegression
   ‚Ä¢ Configuration: R√©gularisation ElasticNet
ROC-AUC: 0.9999 (355.2s)
üìä Voting (Soft)... ROC-AUC: 0.9998 (85.2s)

‚úÖ Ensembles termin√©s!


7. üìä Lecture des R√©sultats avec Pandas DataFrame

In [11]:
# Combiner tous les r√©sultats
all_results = baseline_results + tuned_results + ensemble_results
df_results = pd.DataFrame(all_results)

print("üìä R√©sultats de tous les mod√®les:\n")
print(df_results[['model', 'stage', 'roc_auc', 'f1_score', 'duration']].to_string(index=False))

# Afficher le top 5 par ROC-AUC
print("\nüèÜ Top 5 mod√®les (ROC-AUC):\n")
top5 = df_results.nlargest(5, 'roc_auc')[['model', 'stage', 'roc_auc', 'f1_score']]
print(top5.to_string(index=False))

üìä R√©sultats de tous les mod√®les:

                         model    stage  roc_auc  f1_score   duration
                       XGBoost baseline 0.999531  0.964048   2.297465
                      LightGBM baseline 0.999535  0.966711   1.887083
                 Random_Forest baseline 0.997076  0.917431   8.663865
                      CatBoost baseline 0.999789  0.971504   3.996373
Logistic_Regression_ElasticNet baseline 0.999817  0.948784  37.959722
                       XGBoost    tuned 0.999704  0.973666  44.975532
                      LightGBM    tuned 0.999744  0.972428  44.945735
                 Random_Forest    tuned 0.997352  0.918489 309.632431
                      CatBoost    tuned 0.999765  0.977058 245.529327
Logistic_Regression_ElasticNet    tuned 0.999899  0.954282 236.558460
                   Stacking_LR ensemble 0.999905  0.985953 355.159677
                   Voting_Soft ensemble 0.999842  0.977303  85.174074

üèÜ Top 5 mod√®les (ROC-AUC):

                  

In [12]:
# Lire depuis MLflow directement
print("\nüì• Lecture depuis MLflow...\n")

# Obtenir l'ID de l'experiment
experiment = mlflow.get_experiment_by_name(EXPERIMENT_NAME)
experiment_id = experiment.experiment_id

# Rechercher toutes les runs
df_mlflow = mlflow.search_runs(
    experiment_ids=[experiment_id],
    filter_string="metrics.roc_auc > 0",
    order_by=["metrics.roc_auc DESC"]
)

# Afficher les colonnes importantes
if len(df_mlflow) > 0:
    cols_to_show = ['run_id', 'params.model_name', 'params.stage', 
                    'metrics.roc_auc', 'metrics.f1_score', 'metrics.training_duration']
    available_cols = [col for col in cols_to_show if col in df_mlflow.columns]
    print(df_mlflow[available_cols].head(10))
    print(f"\n‚úÖ {len(df_mlflow)} runs trouv√©es dans MLflow")
else:
    print("‚ö†Ô∏è  Aucune run trouv√©e dans MLflow")


üì• Lecture depuis MLflow...

                             run_id               params.model_name  \
0  b72341500b49443c96727e61083a457d                     Stacking_LR   
1  e99dc336e4e6454cb917caaf0186c1ca  Logistic_Regression_ElasticNet   
2  83dececb6b024018925558ac311b3b53                     Voting_Soft   
3  d89c44df92fe4cf2b35d2de2bb03e1cb  Logistic_Regression_ElasticNet   
4  d663194d575441f9997fddab193d141e                        CatBoost   
5  9598077b632f4bf3b657aa0cf2437df0                        CatBoost   
6  eee47e07b2734cf98d26f00d1c432456                        LightGBM   
7  2cf25602ba014a51b64fd1bb1aefb27a                         XGBoost   
8  27be18a5ae0448dc8e4ec70320a40fdf                        LightGBM   
9  567af913fed840288bd27b85b7781c10                         XGBoost   

  params.stage  metrics.roc_auc  metrics.f1_score  metrics.training_duration  
0     ensemble         0.999905          0.985953                 355.159677  
1        tuned         0.999

8. üèÜ S√©lection du Meilleur Mod√®le (ROC-AUC)

In [13]:
# Depuis notre DataFrame local
best_idx = df_results['roc_auc'].idxmax()
best_row = df_results.loc[best_idx]

best_model_name = best_row['model']
best_stage = best_row['stage']
best_run_id = best_row['run_id']
best_roc_auc = best_row['roc_auc']

print("üèÜ MEILLEUR MOD√àLE (ROC-AUC)")
print("="*60)
print(f"Mod√®le:    {best_model_name}")
print(f"Stage:     {best_stage}")
print(f"ROC-AUC:   {best_roc_auc:.4f}")
print(f"F1-Score:  {best_row['f1_score']:.4f}")
print(f"Precision: {best_row['precision']:.4f}")
print(f"Recall:    {best_row['recall']:.4f}")
print(f"Run ID:    {best_run_id}")
print("="*60)

# R√©cup√©rer le mod√®le
best_model_key = f"{best_model_name}_{best_stage}" if best_stage != 'ensemble' else best_model_name
best_model = trained_models.get(best_model_key)

print(f"\n‚úÖ Mod√®le charg√© : {type(best_model).__name__}")

üèÜ MEILLEUR MOD√àLE (ROC-AUC)
Mod√®le:    Stacking_LR
Stage:     ensemble
ROC-AUC:   0.9999
F1-Score:  0.9860
Precision: 0.9775
Recall:    0.9946
Run ID:    b72341500b49443c96727e61083a457d

‚úÖ Mod√®le charg√© : StackingClassifier


9. üîÑ Chargement du Mod√®le depuis MLflow

In [14]:
# Charger le mod√®le depuis le run_id
print(f"üì• Chargement du mod√®le depuis run_id: {best_run_id}\n")

# Via artifact (compatible DagsHub)
try:
    # T√©l√©charger l'artifact
    model_filename = f"{best_model_name}_{best_stage}.pkl"
    artifact_uri = f"runs:/{best_run_id}/{model_filename}"
    
    local_path = mlflow.artifacts.download_artifacts(artifact_uri)
    
    # Charger avec pickle
    with open(local_path, 'rb') as f:
        loaded_model = pickle.load(f)
    
    print(f"‚úÖ Mod√®le charg√© depuis MLflow artifact")
    print(f"   Type: {type(loaded_model).__name__}")
    
except Exception as e:
    print(f"‚ö†Ô∏è  Erreur de chargement depuis MLflow: {e}")
    print(f"   Utilisation du mod√®le en m√©moire √† la place")
    loaded_model = best_model

# Alternative: Charger depuis fichier local
local_model_file = f"{best_model_name}_{best_stage}.pkl"
if os.path.exists(local_model_file):
    with open(local_model_file, 'rb') as f:
        loaded_model_local = pickle.load(f)
    print(f"‚úÖ Mod√®le √©galement disponible localement: {local_model_file}")

üì• Chargement du mod√®le depuis run_id: b72341500b49443c96727e61083a457d

‚úÖ Mod√®le charg√© depuis MLflow artifact
   Type: StackingClassifier
‚úÖ Mod√®le √©galement disponible localement: Stacking_LR_ensemble.pkl


In [15]:
# Test du mod√®le charg√©
print("\nüß™ Test du mod√®le charg√©...\n")

# Pr√©dictions
y_pred_loaded = loaded_model.predict(X_test)
y_proba_loaded = loaded_model.predict_proba(X_test)[:, 1]

# M√©triques
test_metrics = calculate_metrics(y_test, y_pred_loaded, y_proba_loaded)

print("üìä Performances du mod√®le charg√©:")
for metric, value in test_metrics.items():
    print(f"   {metric:12s}: {value:.4f}")

# Test sur quelques exemples
print("\nüîç Pr√©dictions sur 5 exemples:")
sample_predictions = loaded_model.predict(X_test[:5])
sample_probas = loaded_model.predict_proba(X_test[:5])[:, 1]

for i in range(5):
    print(f"   Sample {i+1}: Churn={sample_predictions[i]}, Proba={sample_probas[i]:.4f}")

print("\n‚úÖ Mod√®le fonctionne correctement!")


üß™ Test du mod√®le charg√©...

üìä Performances du mod√®le charg√©:
   accuracy    : 0.9965
   precision   : 0.9775
   recall      : 0.9946
   f1_score    : 0.9860
   roc_auc     : 0.9999

üîç Pr√©dictions sur 5 exemples:
   Sample 1: Churn=0, Proba=0.0000
   Sample 2: Churn=0, Proba=0.0000
   Sample 3: Churn=0, Proba=0.0000
   Sample 4: Churn=0, Proba=0.0000
   Sample 5: Churn=0, Proba=0.0090

‚úÖ Mod√®le fonctionne correctement!


In [16]:


print("\n" + "="*80)
print("üì¶ ENREGISTREMENT DANS MLFLOW MODEL REGISTRY")
print("="*80)

# 1. Enregistrer le mod√®le dans MLflow Model Registry
model_name = f"churn_prediction_{best_model_name}"

print(f"\nüîÑ Enregistrement du mod√®le: {model_name}")
print(f"   Run ID: {best_run_id}")

try:
    # Log le mod√®le avec mlflow.sklearn
    with mlflow.start_run(run_id=best_run_id):
        mlflow.sklearn.log_model(
            sk_model=best_model,
            artifact_path="model",
            registered_model_name=model_name
        )
    
    print(f"‚úÖ Mod√®le enregistr√© dans MLflow Model Registry")
    print(f"   Nom: {model_name}")
    
    # 2. Obtenir la derni√®re version
    client = mlflow.tracking.MlflowClient()
    latest_versions = client.get_latest_versions(model_name, stages=["None"])
    
    if latest_versions:
        latest_version = latest_versions[0].version
        print(f"   Version: {latest_version}")
        
        # 3. Transitionner vers Production
        print(f"\nüöÄ Transition vers Production...")
        client.transition_model_version_stage(
            name=model_name,
            version=latest_version,
            stage="Production",
            archive_existing_versions=True
        )
        
        print(f"‚úÖ Mod√®le mis en Production")
        print(f"   Stage: Production")
        print(f"   Version: {latest_version}")
        
        # 4. Ajouter une description
        client.update_model_version(
            name=model_name,
            version=latest_version,
            description=f"""
            Best performing churn prediction model
            - Model: {best_model_name}
            - Stage: {best_stage}
            - ROC-AUC: {best_roc_auc:.4f}
            - F1-Score: {best_row['f1_score']:.4f}
            - Trained: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
            """
        )
        
        # 5. Ajouter des tags
        client.set_model_version_tag(
            name=model_name,
            version=latest_version,
            key="model_type",
            value=best_model_name
        )
        
        client.set_model_version_tag(
            name=model_name,
            version=latest_version,
            key="training_stage",
            value=best_stage
        )
        
        client.set_model_version_tag(
            name=model_name,
            version=latest_version,
            key="roc_auc",
            value=str(round(best_roc_auc, 4))
        )
        
        print(f"‚úÖ M√©tadonn√©es ajout√©es (description + tags)")
        
except Exception as e:
    print(f"‚ö†Ô∏è  Erreur lors de l'enregistrement: {e}")
    print(f"   Le mod√®le reste disponible via run_id: {best_run_id}")

# 6. Fonction pour charger depuis MLflow Model Registry
def load_model_from_registry(model_name, stage="Production"):
    """
    Charge un mod√®le depuis MLflow Model Registry
    
    Args:
        model_name: Nom du mod√®le dans le registry
        stage: Stage du mod√®le (Production, Staging, Archived, None)
    
    Returns:
        model: Le mod√®le charg√©
    """
    model_uri = f"models:/{model_name}/{stage}"
    model = mlflow.sklearn.load_model(model_uri)
    return model

# 7. Test de chargement depuis MLflow Registry
print("\nüîÑ Test de chargement depuis MLflow Model Registry...\n")

try:
    loaded_model_mlflow = load_model_from_registry(model_name, stage="Production")
    
    # Test de pr√©diction
    y_pred_test = loaded_model_mlflow.predict(X_test[:5])
    y_proba_test = loaded_model_mlflow.predict_proba(X_test)[:, 1]
    
    # Calculer les m√©triques
    mlflow_metrics = calculate_metrics(y_test, loaded_model_mlflow.predict(X_test), y_proba_test)
    
    print(f"‚úÖ Mod√®le charg√© depuis MLflow Registry")
    print(f"   URI: models:/{model_name}/Production")
    print(f"   Type: {type(loaded_model_mlflow).__name__}")
    print(f"\nüìä Performances:")
    for metric, value in mlflow_metrics.items():
        print(f"   {metric:12s}: {value:.4f}")
    
    print(f"\nüß™ Test sur 5 exemples:")
    for i in range(5):
        print(f"   Sample {i+1}: Prediction={y_pred_test[i]}")
    
    print("\n‚úÖ Le mod√®le fonctionne correctement!")
    
except Exception as e:
    print(f"‚ö†Ô∏è  Erreur de chargement: {e}")

# 8. Afficher toutes les versions du mod√®le
print("\nüìã Versions du mod√®le dans le Registry:\n")

try:
    client = mlflow.tracking.MlflowClient()
    
    # R√©cup√©rer toutes les versions
    all_versions = client.search_model_versions(f"name='{model_name}'")
    
    if all_versions:
        print(f"{'Version':<10} {'Stage':<15} {'Created':<20} {'Run ID':<40}")
        print("-" * 90)
        
        for mv in all_versions:
            created = datetime.fromtimestamp(mv.creation_timestamp/1000).strftime('%Y-%m-%d %H:%M:%S')
            print(f"{mv.version:<10} {mv.current_stage:<15} {created:<20} {mv.run_id:<40}")
        
        print(f"\n‚úÖ {len(all_versions)} version(s) trouv√©e(s)")
    else:
        print("‚ö†Ô∏è  Aucune version trouv√©e")
        
except Exception as e:
    print(f"‚ö†Ô∏è  Erreur: {e}")

# 9. Exemple d'utilisation en production
print("\n" + "="*80)
print("üí° UTILISATION EN PRODUCTION")
print("="*80)

print(f"""
# Charger le mod√®le en production:
import mlflow

model = mlflow.sklearn.load_model("models:/{model_name}/Production")

# Faire des pr√©dictions:
predictions = model.predict(X_new)
probabilities = model.predict_proba(X_new)[:, 1]

# Ou charger une version sp√©cifique:
model_v1 = mlflow.sklearn.load_model("models:/{model_name}/1")

# Ou charger depuis un run_id:
model_from_run = mlflow.sklearn.load_model("runs:/{best_run_id}/model")
""")

print("\n" + "="*80)
print("‚úÖ ENREGISTREMENT MLFLOW MODEL REGISTRY TERMIN√â")
print("="*80)




üì¶ ENREGISTREMENT DANS MLFLOW MODEL REGISTRY

üîÑ Enregistrement du mod√®le: churn_prediction_Stacking_LR
   Run ID: b72341500b49443c96727e61083a457d


Successfully registered model 'churn_prediction_Stacking_LR'.
Created version '1' of model 'churn_prediction_Stacking_LR'.


‚úÖ Mod√®le enregistr√© dans MLflow Model Registry
   Nom: churn_prediction_Stacking_LR
   Version: 1

üöÄ Transition vers Production...
‚úÖ Mod√®le mis en Production
   Stage: Production
   Version: 1
‚úÖ M√©tadonn√©es ajout√©es (description + tags)

üîÑ Test de chargement depuis MLflow Model Registry...

‚úÖ Mod√®le charg√© depuis MLflow Registry
   URI: models:/churn_prediction_Stacking_LR/Production
   Type: StackingClassifier

üìä Performances:
   accuracy    : 0.9965
   precision   : 0.9775
   recall      : 0.9946
   f1_score    : 0.9860
   roc_auc     : 0.9999

üß™ Test sur 5 exemples:
   Sample 1: Prediction=0
   Sample 2: Prediction=0
   Sample 3: Prediction=0
   Sample 4: Prediction=0
   Sample 5: Prediction=0

‚úÖ Le mod√®le fonctionne correctement!

üìã Versions du mod√®le dans le Registry:

Version    Stage           Created              Run ID                                  
------------------------------------------------------------------------------------------


10. üì¶ Enregistrement dans Model Registry (Local)

In [17]:
# Cr√©er un Model Registry local
MODEL_REGISTRY_DIR = Path("processors/model_registry")
MODEL_REGISTRY_DIR.mkdir(exist_ok=True)

def register_model(model, model_name, version="1.0.0", stage="production"):
    """
    Enregistre un mod√®le dans le registry local
    """
    import json
    import shutil
    
    # Cr√©er la structure
    model_dir = MODEL_REGISTRY_DIR / model_name.replace(" ", "_")
    model_dir.mkdir(exist_ok=True)
    
    version_dir = model_dir / version
    version_dir.mkdir(exist_ok=True)
    
    # Sauvegarder le mod√®le
    model_path = version_dir / "model.pkl"
    with open(model_path, 'wb') as f:
        pickle.dump(model, f)
    
    # M√©tadonn√©es
    metadata = {
        "model_name": model_name,
        "version": version,
        "stage": stage,
        "registered_at": datetime.now().isoformat(),
        "metrics": test_metrics,
        "run_id": best_run_id
    }
    
    with open(version_dir / "metadata.json", 'w') as f:
        json.dump(metadata, f, indent=2)
    
    # Lien production
    if stage == "production":
        prod_path = model_dir / "production.pkl"
        shutil.copy(model_path, prod_path)
    
    return str(model_path)

# Enregistrer le meilleur mod√®le
print("üì¶ Enregistrement dans Model Registry...\n")

registry_name = f"Best_Churn_{best_model_name}"
model_path = register_model(
    model=loaded_model,
    model_name=registry_name,
    version="1.0.0",
    stage="production"
)

print(f"‚úÖ Mod√®le enregistr√© dans le registry")
print(f"   Nom: {registry_name}")
print(f"   Version: 1.0.0")
print(f"   Stage: production")
print(f"   Path: {model_path}")

üì¶ Enregistrement dans Model Registry...

‚úÖ Mod√®le enregistr√© dans le registry
   Nom: Best_Churn_Stacking_LR
   Version: 1.0.0
   Stage: production
   Path: processors\model_registry\Best_Churn_Stacking_LR\1.0.0\model.pkl


In [18]:
# Fonction pour charger depuis le registry
def load_from_registry(model_name, stage="production"):
    """Charge un mod√®le depuis le registry local"""
    import json
    
    model_dir = MODEL_REGISTRY_DIR / model_name.replace(" ", "_")
    model_path = model_dir / f"{stage}.pkl"
    
    with open(model_path, 'rb') as f:
        model = pickle.load(f)
    
    # Charger les m√©tadonn√©es
    versions = [d for d in model_dir.iterdir() if d.is_dir()]
    if versions:
        latest_version = sorted(versions)[-1]
        with open(latest_version / "metadata.json", 'r') as f:
            metadata = json.load(f)
    else:
        metadata = {}
    
    return model, metadata

# Test du chargement
print("\nüîÑ Test de chargement depuis le registry...\n")

loaded_from_registry, metadata = load_from_registry(registry_name, stage="production")

print(f"‚úÖ Mod√®le charg√© depuis le registry")
print(f"   Nom: {metadata.get('model_name', 'N/A')}")
print(f"   Version: {metadata.get('version', 'N/A')}")
print(f"   ROC-AUC: {metadata.get('metrics', {}).get('roc_auc', 0):.4f}")

# Test de pr√©diction
test_pred = loaded_from_registry.predict(X_test[:5])
print(f"\nüß™ Test de pr√©diction: {test_pred}")
print("‚úÖ Le mod√®le fonctionne correctement!")


üîÑ Test de chargement depuis le registry...

‚úÖ Mod√®le charg√© depuis le registry
   Nom: Best_Churn_Stacking_LR
   Version: 1.0.0
   ROC-AUC: 0.9999

üß™ Test de pr√©diction: [0 0 0 0 0]
‚úÖ Le mod√®le fonctionne correctement!


11. üìä R√©sum√© Final

In [19]:
print("\n" + "="*80)
print("üéâ R√âSUM√â FINAL - MLflow Tracking")
print("="*80)

print(f"\nüìä Mod√®les entra√Æn√©s:")
print(f"   ‚Ä¢ Baseline:  5 mod√®les")
print(f"   ‚Ä¢ Tuned:     5 mod√®les (n_iter={N_ITER})")
print(f"   ‚Ä¢ Ensemble:  2 mod√®les (Stacking + Voting)")
print(f"   ‚Ä¢ TOTAL:     12 mod√®les")

print(f"\nüèÜ Meilleur mod√®le:")
print(f"   ‚Ä¢ Nom:       {best_model_name}")
print(f"   ‚Ä¢ Stage:     {best_stage}")
print(f"   ‚Ä¢ ROC-AUC:   {best_roc_auc:.4f}")
print(f"   ‚Ä¢ F1-Score:  {best_row['f1_score']:.4f}")

print(f"\nüîó MLflow:")
print(f"   ‚Ä¢ Tracking URI: {MLFLOW_TRACKING_URI}")
print(f"   ‚Ä¢ Experiment:   {EXPERIMENT_NAME}")
print(f"   ‚Ä¢ Runs totales: {len(df_results)}")

print(f"\nüì¶ Model Registry:")
print(f"   ‚Ä¢ Nom:     {registry_name}")
print(f"   ‚Ä¢ Version: 1.0.0")
print(f"   ‚Ä¢ Stage:   production")
print(f"   ‚Ä¢ Path:    {MODEL_REGISTRY_DIR / registry_name.replace(' ', '_')}")

print("\n" + "="*80)
print("‚úÖ Pipeline MLflow termin√© avec succ√®s!")
print("="*80)

print("\nüí° Prochaines √©tapes:")
print("   1. Consultez MLflow UI pour voir toutes les runs")
print("   2. Chargez le mod√®le avec: load_from_registry()")
print("   3. D√©ployez en production")
print("   4. Configurez le monitoring")


üéâ R√âSUM√â FINAL - MLflow Tracking

üìä Mod√®les entra√Æn√©s:
   ‚Ä¢ Baseline:  5 mod√®les
   ‚Ä¢ Tuned:     5 mod√®les (n_iter=10)
   ‚Ä¢ Ensemble:  2 mod√®les (Stacking + Voting)
   ‚Ä¢ TOTAL:     12 mod√®les

üèÜ Meilleur mod√®le:
   ‚Ä¢ Nom:       Stacking_LR
   ‚Ä¢ Stage:     ensemble
   ‚Ä¢ ROC-AUC:   0.9999
   ‚Ä¢ F1-Score:  0.9860

üîó MLflow:
   ‚Ä¢ Tracking URI: ./mlruns
   ‚Ä¢ Experiment:   churn_prediction
   ‚Ä¢ Runs totales: 12

üì¶ Model Registry:
   ‚Ä¢ Nom:     Best_Churn_Stacking_LR
   ‚Ä¢ Version: 1.0.0
   ‚Ä¢ Stage:   production
   ‚Ä¢ Path:    processors\model_registry\Best_Churn_Stacking_LR

‚úÖ Pipeline MLflow termin√© avec succ√®s!

üí° Prochaines √©tapes:
   1. Consultez MLflow UI pour voir toutes les runs
   2. Chargez le mod√®le avec: load_from_registry()
   3. D√©ployez en production
   4. Configurez le monitoring
