# Notebook : 02_model_training_unet.ipynb
- Objectif : entraîner un premier modèle U-Net sur Cityscapes (8 classes) avec pipeline MLOps
## ────────────────────────────────────────────────────────────────
## Activation de l’environnement conda (local uniquement)
## ────────────────────────────────────────────────────────────────
# 🚀 NOTE IMPORTANTE
## Avant de lancer ce notebook :
### 1. Exécute manuellement scripts/launch_tf_gpu_env.bat
### 2. Puis scripts/register_kernel.bat si nécessaire
## Cela garantit que l’environnement conda GPU + kernel Jupyter sont bien actifs
## ────────────────────────────────────────────────────────────────
# 1 - Imports & préparation

In [1]:
def load_gpu_config():
    import yaml
    with open("../config/config_gpu.yaml") as f:
        gpu_config = yaml.safe_load(f)
    return gpu_config

In [2]:
gpu_config = load_gpu_config()
BATCH_SIZE = gpu_config["gpu"]["batch_size"]
SET_MEMORY_GROWTH = gpu_config["gpu"]["set_memory_growth"]

print(f"🎛️ Param batch size : {BATCH_SIZE}")

🎛️ Param batch size : 4


In [3]:
## Imports & config GPU
import os
import sys
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
import tensorflow as tf
import GPUtil
from tensorflow import keras
from keras import layers
import GPUtil
import pandas as pd
from IPython.display import display
import runpy

import mlflow
mlflow.set_tracking_uri("file:./mlruns")

from dotenv import load_dotenv
load_dotenv()




False

## 1.1 - Configuration TensorFlow pour forcer le GPU

In [4]:
print("🧠 Device utilisé :", tf.test.gpu_device_name())
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    if SET_MEMORY_GROWTH:
        try:
            for gpu in gpus:
                tf.config.experimental.set_memory_growth(gpu, True)
            print("✅ GPU détecté et mémoire dynamique activée")
        except RuntimeError as e:
            print("❌ Erreur d'allocation mémoire GPU :", e)
    else:
        print("✅ GPU détecté (set_memory_growth désactivé)")
else:
    print("⚠️ Aucun GPU détecté — fallback CPU")

🧠 Device utilisé : 
⚠️ Aucun GPU détecté — fallback CPU


## 1.2 - Visualisation GPU

In [5]:
try:
    GPUtil.showUtilization()
except:
    print("[INFO] GPUtil non disponible")

| ID | GPU | MEM |
------------------
|  0 |  5% | 19% |


In [6]:
# Remonter à la racine du projet
project_root = Path("..").resolve()

# Ajouter src/ au PYTHONPATH
src_path = project_root / "src"
if str(src_path) not in sys.path:
    sys.path.append(str(src_path))

# Chemins utiles
# data_dir      = project_root / "data"
# processed_dir = data_dir / "processed"
# models_dir    = project_root / "models"

# train = np.load(processed_dir / "train.npz")
# val   = np.load(processed_dir / "val.npz")

# X_train, Y_train = train["X"], train["Y"]
# X_val, Y_val     = val["X"], val["Y"]

In [7]:
from utils.utils import plot_history

# 2 - Unet
## 2.1 - Chargement des fonctions d'entraînement

In [8]:
# from model_training.train_unet import train_unet_model_from_npz
from model_training.train_unet import train_unet_model_albumentation, fetch_mlflow_runs

[INFO] Configuration GPU...
⚠️ Aucun GPU détecté. Utilisation du CPU.
| ID | GPU | MEM |
------------------
|  0 |  5% | 19% |


In [9]:
runpy.run_path(str(project_root / "scripts" / "generate_albu_dataset.py"))

{'__name__': '<run_path>',
 '__doc__': None,
 '__package__': '',
 '__loader__': None,
 '__spec__': None,
 '__file__': 'C:\\Users\\motar\\Desktop\\1-openclassrooms\\AI_Engineer\\1-projets\\P08\\2-python\\scripts\\generate_albu_dataset.py',
 '__cached__': None,
 '__builtins__': {'__name__': 'builtins',
  '__doc__': "Built-in functions, exceptions, and other objects.\n\nNoteworthy: None is the `nil' object; Ellipsis represents `...' in slices.",
  '__package__': '',
  '__loader__': _frozen_importlib.BuiltinImporter,
  '__spec__': ModuleSpec(name='builtins', loader=<class '_frozen_importlib.BuiltinImporter'>, origin='built-in'),
  '__build_class__': <function __build_class__>,
  '__import__': <function __import__>,
  'abs': <function abs(x, /)>,
  'all': <function all(iterable, /)>,
  'any': <function any(iterable, /)>,
  'ascii': <function ascii(obj, /)>,
  'bin': <function bin(number, /)>,
  'breakpoint': <function breakpoint>,
  'callable': <function callable(obj, /)>,
  'chr': <functio

## 2.2 - Paramètres

In [10]:
params_grid = [
    {"img_size": (256, 256), "epochs": 30, "batch_size": 16, "use_early_stopping": True, "force_retrain": True},
    {"img_size": (256, 256), "epochs": 40, "batch_size": 16, "use_early_stopping": True, "force_retrain": True},
    {"img_size": (256, 256), "epochs": 50, "batch_size": 16, "use_early_stopping": True, "force_retrain": False},
    {"img_size": (256, 256), "epochs": 50, "batch_size": 16, "use_early_stopping": True, "force_retrain": False}
]

# 3 - Entraînement

In [None]:
results = []

for i, params in enumerate(params_grid):
    print(f"\n🔁 Lancement de l'entraînement {i+1}/{len(params_grid)} avec batch={params['batch_size']}, epochs={params['epochs']}")

    model, history = train_unet_model_albumentation(
        img_size=params['img_size'],
        epochs=params['epochs'],
        batch_size=params['batch_size'],
        use_early_stopping=params['use_early_stopping'],
        force_retrain=params['force_retrain'],
        turbo=True
    )

    val_acc = max(history['val_accuracy'])
    val_loss = min(history['val_loss'])

    results.append({
        'run': i + 1,
        'params': params,
        'val_accuracy': val_acc,
        'val_loss': val_loss,
        'model': model,
        'history': history
    })


🔁 Lancement de l'entraînement 1/4 avec batch=16, epochs=30
🔄 Lancement du serveur MLflow local...
✅ Serveur MLflow démarré sur http://127.0.0.1:5000
[LOG] ➤ train_unet_model_albumentation appelé
🚀 Mode TURBO activé : optimisations en cours...
The dtype policy mixed_float16 may run slowly because this machine does not have a GPU. Only Nvidia GPUs with compute capability of at least 7.0 run quickly with mixed_float16.
[INFO] Initialisation des DataGenerators...
[INFO] Initialisation du modèle...
✅ Serveur MLflow déjà actif sur http://127.0.0.1:5000
[LOG] ➤ unet_mini appelé



[INFO] Début entraînement avec Albumentation...
Epoch 1/30
🔁 Surveillance CPU/GPU (LIVE - rafraichi toutes les 3s)
🧠 CPU Usage : 0.0%
🧠 RAM Usage : 33.3% (22.9GB / 68.6GB)
🎮 GPU: NVIDIA GeForce GTX 1060 6GB
   Utilisation : 3.0%
   RAM : 1219 / 6144 MB


🔁 Surveillance CPU/GPU (LIVE - rafraichi toutes les 3s)
🧠 CPU Usage : 55.7%
🧠 RAM Usage : 36.7% (25.2GB / 68.6GB)
🎮 GPU: NVIDIA GeForce GTX 1060 6GB
   Utilisation

# 4 - Sélection du meilleur run

In [None]:
best_run = sorted(results, key=lambda x: x['val_accuracy'], reverse=True)[0]
print(f"\n✅ Meilleur modèle : Run {best_run['run']} avec val_accuracy = {best_run['val_accuracy']:.4f}")

best_model = best_run['model']
best_history = best_run['history']

## 5 - Résumé du modèle

In [None]:
model.summary()

# 6 - Visualisation training

In [None]:
plot_history(history)

# 7 - Récapitulatif des runs MLflow

In [None]:
mlflow_df = fetch_mlflow_runs()
mlflow_df = mlflow_df.sort_values(by="Final val_accuracy", ascending=False)
display(mlflow_df)