In [1]:
# ============================================
# IMPORTS
# ============================================

import pandas as pd
import mlflow
from mlflow.tracking import MlflowClient
from datetime import datetime
import warnings

warnings.filterwarnings('ignore')

print("="*80)
print(" MLflow Model Registry")
print("="*80)

 MLflow Model Registry


In [2]:
# ============================================
# CONFIGURATION MLFLOW
# ============================================

MLFLOW_TRACKING_URI = "http://localhost:5000"
MODEL_NAME = "kid-friendly-classifier"
EXPERIMENT_NAME = "podcast-classification-kid-friendly"

mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)
client = MlflowClient()

print(f"Tracking URI: {MLFLOW_TRACKING_URI}")
print(f"Model Name: {MODEL_NAME}")
print(f"Experiment: {EXPERIMENT_NAME}")

Tracking URI: http://localhost:5000
Model Name: kid-friendly-classifier
Experiment: podcast-classification-kid-friendly


In [3]:
# ============================================
# RECHERCHE DU MEILLEUR RUN
# ============================================

experiment = mlflow.get_experiment_by_name(EXPERIMENT_NAME)

if not experiment:
    print(f"⚠ Expérience '{EXPERIMENT_NAME}' introuvable")
    print(" → Vérifiez que les notebooks précédents ont été exécutés")
else:
    runs = mlflow.search_runs(
        experiment_ids=[experiment.experiment_id],
        order_by=["metrics.f1_score DESC"],
        max_results=10
    )
    
    print("\n" + "="*80)
    print(" TOP 10 RUNS (par F1-Score)")
    print("="*80 + "\n")
    
    display_cols = ['tags.model_type', 'metrics.f1_score', 
                   'metrics.accuracy', 'metrics.precision', 'metrics.recall']
    available_cols = [col for col in display_cols if col in runs.columns]
    
    print(runs[available_cols].head(10).to_string(index=False))
    print("\n" + "="*80)


 TOP 10 RUNS (par F1-Score)

        tags.model_type  metrics.f1_score  metrics.accuracy  metrics.precision  metrics.recall
             Linear SVM          0.734694          0.845238           0.666667        0.818182
    Logistic Regression          0.679245          0.797619           0.580645        0.818182
Multinomial Naive Bayes          0.466667          0.809524           0.875000        0.318182
 Dummy (Baseline Naïve)          0.000000          0.738095           0.000000        0.000000



In [4]:
# ============================================
# FONCTION D'ENREGISTREMENT
# ============================================

def select_and_register_best_model(
    experiment_name,
    model_name,
    metric="f1_score",
    stage="Production",
    archive_existing=True
):
    """
    Sélectionne le meilleur run et enregistre dans Model Registry
    """
    print("\n" + "="*80)
    print(" SÉLECTION ET ENREGISTREMENT DU MEILLEUR MODÈLE")
    print("="*80)
    
    # Récupérer expérience
    experiment = mlflow.get_experiment_by_name(experiment_name)
    if not experiment:
        print(f" ✗ Expérience '{experiment_name}' introuvable")
        return None
    
    # Rechercher le meilleur run
    runs = mlflow.search_runs(
        experiment_ids=[experiment.experiment_id],
        order_by=[f"metrics.{metric} DESC"],
        max_results=1
    )
    
    if runs.empty:
        print(" ✗ Aucun run trouvé")
        return None
    
    best_run = runs.iloc[0]
    run_id = best_run['run_id']
    
    print(f"\n Meilleur Run ID: {run_id[:8]}...")
    print(f" Modèle: {best_run.get('tags.model_type', 'Unknown')}")
    print(f" {metric.upper()}: {best_run[f'metrics.{metric}']:.4f}")
    
    # Métriques pour la description
    metrics = {}
    for m in ['accuracy', 'precision', 'recall', 'f1_score', 'roc_auc']:
        col = f'metrics.{m}'
        if col in best_run:
            metrics[m] = best_run[col]
    
    # Description de version
    description = f"""Best performing model selected on {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}

Performance Metrics:
{chr(10).join([f'  • {k.replace('_', ' ').title()}: {v:.4f}' for k, v in metrics.items()])}

Model Type: {best_run.get('tags.model_type', 'Unknown')}
Training Date: {best_run.get('start_time', 'Unknown')}
"""
    
    # Enregistrer le modèle
    model_uri = f"runs:/{run_id}/model"
    
    try:
        model_details = mlflow.register_model(
            model_uri=model_uri,
            name=model_name
        )
        
        version = model_details.version
        print(f"\n ✓ Modèle enregistré: {model_name} v{version}")
        
        # Mise à jour de la description
        client = MlflowClient()
        client.update_model_version(
            name=model_name,
            version=version,
            description=description
        )
        
        # Tags
        client.set_model_version_tag(
            name=model_name,
            version=version,
            key="model_type",
            value=str(best_run.get('tags.model_type', 'Unknown'))
        )
        
        client.set_model_version_tag(
            name=model_name,
            version=version,
            key="validation_f1_score",
            value=str(best_run[f'metrics.{metric}'])
        )
        
        client.set_model_version_tag(
            name=model_name,
            version=version,
            key="training_date",
            value=datetime.now().strftime('%Y-%m-%d')
        )
        
        client.set_model_version_tag(
            name=model_name,
            version=version,
            key="framework",
            value="scikit-learn"
        )
        
        print(" ✓ Métadonnées mises à jour")
        
        # Transition vers Production
        if archive_existing:
            # Archiver les versions en Production
            existing_versions = client.get_latest_versions(
                name=model_name,
                stages=["Production"]
            )
            for v in existing_versions:
                client.transition_model_version_stage(
                    name=model_name,
                    version=v.version,
                    stage="Archived"
                )
                print(f" → Version {v.version} archivée")
        
        # Nouvelle version en Production
        client.transition_model_version_stage(
            name=model_name,
            version=version,
            stage=stage
        )
        print(f" ✓ Version {version} → {stage}")
        
        # Alias
        try:
            client.set_registered_model_alias(
                name=model_name,
                alias="champion",
                version=version
            )
            print(f" ✓ Alias 'champion' assigné à v{version}")
        except:
            pass
        
        print("\n" + "="*80)
        print(" ENREGISTREMENT RÉUSSI")
        print("="*80)
        
        return {
            'model_name': model_name,
            'version': version,
            'run_id': run_id,
            'stage': stage,
            'metrics': metrics
        }
        
    except Exception as e:
        print(f" ✗ Erreur: {str(e)}")
        return None

In [5]:
# ============================================
# ENREGISTREMENT
# ============================================

result = select_and_register_best_model(
    experiment_name=EXPERIMENT_NAME,
    model_name=MODEL_NAME,
    metric="f1_score",
    stage="Production",
    archive_existing=True
)


 SÉLECTION ET ENREGISTREMENT DU MEILLEUR MODÈLE

 Meilleur Run ID: d6f8d9c6...
 Modèle: Linear SVM
 F1_SCORE: 0.7347
 ✗ Erreur: API request to endpoint /api/2.0/mlflow/logged-models/search failed with error code 404 != 200. Response body: '<!doctype html>
<html lang=en>
<title>404 Not Found</title>
<h1>Not Found</h1>
<p>The requested URL was not found on the server. If you entered the URL manually please check your spelling and try again.</p>
'


Successfully registered model 'kid-friendly-classifier'.


In [6]:
# ============================================
# INSTRUCTIONS DE CHARGEMENT
# ============================================

if result:
    print("\n" + "="*80)
    print(" INSTRUCTIONS DE CHARGEMENT")
    print("="*80)
    
    print(f"\n1. Charger par STAGE (Production):")
    print(f"""```python
import mlflow
mlflow.set_tracking_uri("{MLFLOW_TRACKING_URI}")

model = mlflow.pyfunc.load_model("models:/{MODEL_NAME}/Production")
predictions = model.predict(new_data)
```""")
    
    print(f"\n2. Charger par VERSION (v{result['version']}):")
    print(f"""```python
import mlflow
mlflow.set_tracking_uri("{MLFLOW_TRACKING_URI}")

model = mlflow.pyfunc.load_model("models:/{MODEL_NAME}/{result['version']}")
predictions = model.predict(new_data)
```""")
    
    print(f"\n3. Charger par ALIAS (champion):")
    print(f"""```python
import mlflow
mlflow.set_tracking_uri("{MLFLOW_TRACKING_URI}")

model = mlflow.pyfunc.load_model("models:/{MODEL_NAME}@champion")
predictions = model.predict(new_data)
```""")
    
    print("\n" + "="*80)
    print(" ACCÈS UI MLFLOW")
    print("="*80)
    print(f"\n → Interface: {MLFLOW_TRACKING_URI}")
    print(f" → Expérience: {EXPERIMENT_NAME}")
    print(f" → Modèle: {MODEL_NAME} (v{result['version']})")
    print("\n" + "="*80)
    print(" ✓ PROJET TERMINÉ")
    print("="*80)

In [7]:
# ============================================
# TEST DE CHARGEMENT
# ============================================

print("\n" + "="*80)
print(" TEST DE CHARGEMENT DU MODÈLE")
print("="*80)

try:
    model = mlflow.pyfunc.load_model(f"models:/{MODEL_NAME}/Production")
    print(f" ✓ Modèle chargé: {MODEL_NAME}/Production")
    
    # Test avec exemples
    test_data = [
        "kids children education fun learning",
        "crime murder mystery dark violence"
    ]
    
    predictions = model.predict(test_data)
    
    print("\n Prédictions de test:")
    for i, (text, pred) in enumerate(zip(test_data, predictions), 1):
        label = "Kid-Friendly" if pred == 1 else "Not Kid-Friendly"
        print(f"   {i}. '{text[:40]}...' → {label}")
    
    print("\n ✓ Modèle fonctionnel")
    
except Exception as e:
    print(f" ✗ Erreur: {str(e)}")

print("="*80)


 TEST DE CHARGEMENT DU MODÈLE
 ✗ Erreur: No versions of model with name 'kid-friendly-classifier' and stage 'Production' found
