# T11 : Versioning de ModÃ¨le et Registry (MLOps)

## Objectif
Mettre en place un systÃ¨me de **Model Registry** lÃ©ger mais robuste pour :
1.  GÃ©rer les versions successives des modÃ¨les (v1, v2...).
2.  Tracer les hyperparamÃ¨tres et les mÃ©triques de performance.
3.  Comparer automatiquement les versions (Rapport d'Ã©volution).
4.  Permettre le chargement facile de la "derniÃ¨re meilleure version" pour la production.

Ce systÃ¨me remplace des outils lourds comme MLflow dans cet environnement contraint, en utilisant une structure de fichiers JSON + Pickle standardisÃ©e.

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import json
import pickle
import os
import shutil
from datetime import datetime
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score, f1_score

# Configuration
MODELS_DIR = '../models'
DATA_DIR = '../data/features/'

# CrÃ©ation du dossier models s'il n'existe pas
os.makedirs(MODELS_DIR, exist_ok=True)

## 1. Classe `ModelRegistry`
Le cÅ“ur du systÃ¨me de versioning.

In [2]:
class ModelRegistry:
    def __init__(self, registry_path=MODELS_DIR):
        self.registry_path = registry_path
        self.registry_file = os.path.join(registry_path, 'registry.json')
        self._load_registry()

    def _load_registry(self):
        if os.path.exists(self.registry_file):
            with open(self.registry_file, 'r') as f:
                self.registry = json.load(f)
        else:
            self.registry = {'models': {}}

    def _save_registry(self):
        with open(self.registry_file, 'w') as f:
            json.dump(self.registry, f, indent=4)

    def register_model(self, model, name, params, metrics, author="User"):
        """Enregistre une nouvelle version du modÃ¨le."""
        if name not in self.registry['models']:
            self.registry['models'][name] = []
        
        version_id = len(self.registry['models'][name]) + 1
        version_tag = f"v{version_id}"
        timestamp = datetime.now().isoformat()
        
        # CrÃ©ation du dossier de version
        model_dir = os.path.join(self.registry_path, name, version_tag)
        os.makedirs(model_dir, exist_ok=True)
        
        # Sauvegarde du modÃ¨le (Pickle)
        model_path = os.path.join(model_dir, 'model.pkl')
        with open(model_path, 'wb') as f:
            pickle.dump(model, f)
            
        # MÃ©tadonnÃ©es
        metadata = {
            'version': version_tag,
            'timestamp': timestamp,
            'author': author,
            'params': params,
            'metrics': metrics,
            'path': model_path
        }
        
        # Sauvegarde locale des mÃ©tadonnÃ©es
        with open(os.path.join(model_dir, 'meta.json'), 'w') as f:
            json.dump(metadata, f, indent=4)
            
        # Mise Ã  jour du registre central
        self.registry['models'][name].append(metadata)
        self._save_registry()
        
        print(f"âœ… ModÃ¨le {name} version {version_tag} enregistrÃ© avec succÃ¨s.")
        return version_tag

    def get_history(self, name):
        """Retourne l'historique des versions sous forme de DataFrame."""
        if name not in self.registry['models']:
            return pd.DataFrame()
        
        history = []
        for entry in self.registry['models'][name]:
            # Aplatir le dictionnaire pour le DataFrame
            row = {
                'version': entry['version'],
                'date': entry['timestamp'][:10],
                **entry['metrics'], # Metriques en colonnes
                **{f"param_{k}": v for k, v in entry['params'].items()} # Params prÃ©fixÃ©s
            }
            history.append(row)
            
        return pd.DataFrame(history)

    def load_model(self, name, version='latest'):
        """Charge un modÃ¨le spÃ©cifique ou le dernier."""
        if name not in self.registry['models']:
            raise ValueError(f"ModÃ¨le {name} inconnu.")
            
        if version == 'latest':
            meta = self.registry['models'][name][-1]
        else:
            meta = next((m for m in self.registry['models'][name] if m['version'] == version), None)
            if meta is None:
                raise ValueError(f"Version {version} non trouvÃ©e pour {name}.")
        
        with open(meta['path'], 'rb') as f:
            model = pickle.load(f)
            
        print(f"ðŸ“‚ ModÃ¨le {name} ({meta['version']}) chargÃ©.")
        return model, meta

    def compare_versions(self, name, v_a, v_b):
        """GÃ©nÃ¨re un rapport de comparaison entre deux versions."""
        df = self.get_history(name)
        row_a = df[df['version'] == v_a].iloc[0]
        row_b = df[df['version'] == v_b].iloc[0]
        
        print(f"--- Comparaison : {v_a} vs {v_b} ---n")
        
        # Comparaison MÃ©triques
        metrics_cols = [c for c in df.columns if c not in ['version', 'date'] and not c.startswith('param_')]
        diffs = []
        for m in metrics_cols:
            val_a = row_a[m]
            val_b = row_b[m]
            diff = val_b - val_a
            pct = (diff / val_a) * 100 if val_a != 0 else 0
            icon = "ðŸŸ¢" if diff > 0 else "ðŸ”´" if diff < 0 else "âšª"
            diffs.append({'MÃ©trique': m, v_a: val_a, v_b: val_b, 'Diff': diff, 'Diff %': f"{pct:+.2f}%", 'Status': icon})
            
        print("\nMÃ©triques :")
        display(pd.DataFrame(diffs))
        
        # Comparaison ParamÃ¨tres
        params_cols = [c for c in df.columns if c.startswith('param_')]
        param_changes = []
        for p in params_cols:
            if row_a[p] != row_b[p]:
                param_changes.append({'ParamÃ¨tre': p.replace('param_', ''), v_a: row_a[p], v_b: row_b[p]})
        
        if param_changes:
            print("\nChangements de ParamÃ¨tres :")
            display(pd.DataFrame(param_changes))
        else:
            print("\nAucun changement de paramÃ¨tre dÃ©tectÃ©.")

## 2. Chargement des DonnÃ©es

In [3]:
def load_data(data_dir):
    files = ['GBPUSD_M15_2022_features.csv', 'GBPUSD_M15_2024_features.csv'] # On charge Train et Test
    dfs = []
    for f in files:
        path = os.path.join(data_dir, f)
        if os.path.exists(path):
            df_year = pd.read_csv(path, parse_dates=['timestamp'], index_col='timestamp')
            dfs.append(df_year)
            
    if not dfs:
        raise FileNotFoundError("DonnÃ©es non trouvÃ©es.")
        
    df = pd.concat(dfs)
    df.sort_index(inplace=True)
    
    # Target
    df['target_return'] = df['close_15m'].shift(-1) - df['close_15m']
    df['target'] = (df['target_return'] > 0).astype(int)
    df.dropna(inplace=True)
    return df

df = load_data(DATA_DIR)
train_data = df.loc['2022']
test_data = df.loc['2024']

features = ['rsi_14', 'ema_20', 'ema_50', 'atr_14', 'adx_14']
X_train, y_train = train_data[features], train_data['target']
X_test, y_test = test_data[features], test_data['target']

print(f"Train: {X_train.shape}, Test: {X_test.shape}")

Train: (24918, 5), Test: (24926, 5)


## 3. EntraÃ®nement et Versioning

### Version 1 : Baseline (Random Forest Default)

In [4]:
registry = ModelRegistry()
MODEL_NAME = "rf_direction_classifier"

# --- V1 ---
params_v1 = {
    'n_estimators': 50,
    'max_depth': 3,
    'random_state': 42
}

print("Training V1...")
model_v1 = RandomForestClassifier(**params_v1)
model_v1.fit(X_train, y_train)

# Eval V1
y_pred_v1 = model_v1.predict(X_test)
metrics_v1 = {
    'accuracy': accuracy_score(y_test, y_pred_v1),
    'f1_score': f1_score(y_test, y_pred_v1)
}

# Enregistrement V1
registry.register_model(model_v1, MODEL_NAME, params_v1, metrics_v1, author="JCLoirat")

Training V1...
âœ… ModÃ¨le rf_direction_classifier version v1 enregistrÃ© avec succÃ¨s.


'v1'

### Version 2 : OptimisÃ© (Plus d'arbres, plus profond)

In [5]:
# --- V2 ---
params_v2 = {
    'n_estimators': 200,  # AugmentÃ©
    'max_depth': 10,      # AugmentÃ©
    'min_samples_leaf': 5, # AjoutÃ© pour Ã©viter l'overfit
    'random_state': 42
}

print("Training V2...")
model_v2 = RandomForestClassifier(**params_v2)
model_v2.fit(X_train, y_train)

# Eval V2
y_pred_v2 = model_v2.predict(X_test)
metrics_v2 = {
    'accuracy': accuracy_score(y_test, y_pred_v2),
    'f1_score': f1_score(y_test, y_pred_v2)
}

# Enregistrement V2
registry.register_model(model_v2, MODEL_NAME, params_v2, metrics_v2, author="JCLoirat")

Training V2...
âœ… ModÃ¨le rf_direction_classifier version v2 enregistrÃ© avec succÃ¨s.


'v2'

## 4. Analyse et Rapport d'Ã‰volution
Visualisation de l'historique et des changements.

In [6]:
# Afficher tout l'historique
print("ðŸ“œ Historique du ModÃ¨le :")
history = registry.get_history(MODEL_NAME)
display(history)

# Comparer V1 et V2
registry.compare_versions(MODEL_NAME, 'v1', 'v2')

ðŸ“œ Historique du ModÃ¨le :


Unnamed: 0,version,date,accuracy,f1_score,param_n_estimators,param_max_depth,param_random_state,param_min_samples_leaf
0,v1,2026-02-11,0.51344,0.342585,50,3,42,
1,v2,2026-02-11,0.51015,0.329857,200,10,42,5.0


--- Comparaison : v1 vs v2 ---n

MÃ©triques :


Unnamed: 0,MÃ©trique,v1,v2,Diff,Diff %,Status
0,accuracy,0.51344,0.51015,-0.00329,-0.64%,ðŸ”´
1,f1_score,0.342585,0.329857,-0.012727,-3.72%,ðŸ”´



Changements de ParamÃ¨tres :


Unnamed: 0,ParamÃ¨tre,v1,v2
0,n_estimators,50.0,200.0
1,max_depth,3.0,10.0
2,min_samples_leaf,,5.0


## 5. DÃ©mo : Chargement en Production
Simulation de l'utilisation du registry par une API ou un systÃ¨me de trading.

In [7]:
print("ðŸš€ Simulation Production Startup...")

# Chargement automatique de la derniÃ¨re version
prod_model, prod_meta = registry.load_model(MODEL_NAME, version='latest')

print(f"PrÃªt Ã  utiliser le modÃ¨le v{prod_meta['version']} crÃ©Ã© le {prod_meta['timestamp']}")
print(f"Performance attendue (Accuracy) : {prod_meta['metrics']['accuracy']:.2%}")

# Test infÃ©rence rapide
sample = X_test.iloc[0:1]
prediction = prod_model.predict(sample)
print(f"Prediction pour l'Ã©chantillon : {'HAUSSE' if prediction[0]==1 else 'BAISSE'}")

ðŸš€ Simulation Production Startup...
ðŸ“‚ ModÃ¨le rf_direction_classifier (v2) chargÃ©.
PrÃªt Ã  utiliser le modÃ¨le vv2 crÃ©Ã© le 2026-02-11T16:49:06.967073
Performance attendue (Accuracy) : 51.02%
Prediction pour l'Ã©chantillon : HAUSSE
