# 03 - Training YOLO12 con MLflow Logging
## Fine-tuning del modelo con tracking completo de experimentos

In [None]:
from ultralytics import YOLO
import mlflow
import mlflow.pytorch
from pathlib import Path
import yaml
import json
from datetime import datetime
import torch

In [None]:
# Paths
BASE_DIR = Path("./product_recognition")
MLFLOW_DIR = BASE_DIR / "mlruns"
COCO_DIR = BASE_DIR / "datasets" / "coco"
MODELS_DIR = BASE_DIR / "models"
ARTIFACTS_DIR = BASE_DIR / "artifacts"

# MLflow setup
mlflow.set_tracking_uri(f"file:///{MLFLOW_DIR.absolute()}")
mlflow.set_experiment("product_recognition_yolo12")

print(f"MLflow URI: {mlflow.get_tracking_uri()}")
print(f"Experiment: {mlflow.get_experiment_by_name('product_recognition_yolo12').experiment_id}")

## Configuración de Training

In [None]:
# Training config
TRAIN_CONFIG = {
    'model_size': 'yolo12n',  # nano (más rápido para demo)
    'epochs': 10,              # Pocas epochs para demo
    'batch_size': 16,
    'imgsz': 640,
    'device': 0 if torch.cuda.is_available() else 'cpu',
    'patience': 5,             # Early stopping
    'save_period': 5,          # Guardar checkpoint cada 5 epochs
    'pretrained': True,        # Partir de COCO pre-entrenado
    'optimizer': 'auto',
    'lr0': 0.01,
    'weight_decay': 0.0005,
    'augment': True,
    'mosaic': 1.0,
    'mixup': 0.1,
}

print("Training Configuration:")
for key, value in TRAIN_CONFIG.items():
    print(f"  {key:15s}: {value}")

## Función de Training con MLflow

In [None]:
def train_yolo_with_mlflow(config, dataset_yaml, run_name=None):
    """
    Entrena YOLO12 con logging completo en MLflow
    
    Args:
        config: Dict con configuración de training
        dataset_yaml: Path al archivo YAML del dataset
        run_name: Nombre del run (opcional)
    
    Returns:
        run_id: ID del run de MLflow
        model: Modelo entrenado
    """
    
    if run_name is None:
        run_name = f"yolo12_{config['model_size']}_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
    
    with mlflow.start_run(run_name=run_name) as run:
        run_id = run.info.run_id
        print(f"\n{'='*60}")
        print(f"MLflow Run ID: {run_id}")
        print(f"Run Name: {run_name}")
        print(f"{'='*60}\n")
        
        # 1. Log parameters
        mlflow.log_params(config)
        mlflow.log_param('dataset_yaml', str(dataset_yaml))
        mlflow.log_param('dataset_name', 'COCO128')
        
        # 2. Log dataset info
        with open(dataset_yaml, 'r') as f:
            dataset_config = yaml.safe_load(f)
        mlflow.log_param('num_classes', len(dataset_config['names']))
        
        # 3. Cargar modelo pre-entrenado
        model_name = f"{config['model_size']}.pt"
        print(f"Cargando modelo: {model_name}")
        model = YOLO(model_name)
        
        # 4. Entrenar
        print(f"\nIniciando entrenamiento...\n")
        results = model.train(
            data=str(dataset_yaml),
            epochs=config['epochs'],
            batch=config['batch_size'],
            imgsz=config['imgsz'],
            device=config['device'],
            patience=config['patience'],
            save_period=config['save_period'],
            optimizer=config['optimizer'],
            lr0=config['lr0'],
            weight_decay=config['weight_decay'],
            augment=config['augment'],
            mosaic=config['mosaic'],
            mixup=config['mixup'],
            project=str(MODELS_DIR),
            name=run_name,
            exist_ok=True,
        )
        
        # 5. Log metrics finales
        results_dict = results.results_dict
        for key, value in results_dict.items():
            if isinstance(value, (int, float)):
                mlflow.log_metric(key, value)
        
        # Métricas clave
        mlflow.log_metric('final_mAP50', results_dict.get('metrics/mAP50(B)', 0))
        mlflow.log_metric('final_mAP50-95', results_dict.get('metrics/mAP50-95(B)', 0))
        mlflow.log_metric('final_precision', results_dict.get('metrics/precision(B)', 0))
        mlflow.log_metric('final_recall', results_dict.get('metrics/recall(B)', 0))
        
        # 6. Log artifacts
        training_dir = MODELS_DIR / run_name
        
        # Modelo final
        best_model_path = training_dir / 'weights' / 'best.pt'
        if best_model_path.exists():
            mlflow.log_artifact(str(best_model_path), 'model')
        
        # Gráficas de entrenamiento
        results_png = training_dir / 'results.png'
        if results_png.exists():
            mlflow.log_artifact(str(results_png), 'plots')
        
        confusion_matrix = training_dir / 'confusion_matrix.png'
        if confusion_matrix.exists():
            mlflow.log_artifact(str(confusion_matrix), 'plots')
        
        # 7. Log model en formato MLflow
        # Guardar config para reproducibilidad
        config_path = training_dir / 'train_config.json'
        with open(config_path, 'w') as f:
            json.dump(config, f, indent=2)
        mlflow.log_artifact(str(config_path), 'config')
        
        # Tags útiles
        mlflow.set_tags({
            'model_type': 'YOLO12',
            'model_size': config['model_size'],
            'task': 'object_detection',
            'framework': 'ultralytics',
            'status': 'completed'
        })
        
        print(f"\n{'='*60}")
        print(f"✓ Training completado")
        print(f"✓ Run ID: {run_id}")
        print(f"✓ Best model: {best_model_path}")
        print(f"{'='*60}\n")
        
        return run_id, model

## Ejecutar Training

In [None]:
# Dataset YAML
dataset_yaml = COCO_DIR / "coco128.yaml"

# Entrenar
run_id, trained_model = train_yolo_with_mlflow(
    config=TRAIN_CONFIG,
    dataset_yaml=dataset_yaml,
    run_name="demo_yolo12n_coco128"
)

## Validar modelo entrenado

In [None]:
# Validación en test set
print("\nEvaluando modelo en validation set...")
val_results = trained_model.val(data=str(dataset_yaml))

print(f"\nResultados de validación:")
print(f"  mAP@0.5    : {val_results.box.map50:.4f}")
print(f"  mAP@0.5:0.95: {val_results.box.map:.4f}")
print(f"  Precision  : {val_results.box.mp:.4f}")
print(f"  Recall     : {val_results.box.mr:.4f}")

## Ver run en MLflow UI

In [None]:
# Mostrar link al run
print(f"\n{'='*60}")
print("Para ver los resultados en MLflow UI:")
print(f"\n1. Ejecuta en terminal:")
print(f"   mlflow ui --backend-store-uri {mlflow.get_tracking_uri()} --port 5000")
print(f"\n2. Abre en navegador:")
print(f"   http://localhost:5000/#/experiments/1/runs/{run_id}")
print(f"{'='*60}")

## Comparar múltiples experimentos (opcional)

In [None]:
# Listar todos los runs del experimento
experiment = mlflow.get_experiment_by_name("product_recognition_yolo12")
runs = mlflow.search_runs(
    experiment_ids=[experiment.experiment_id],
    order_by=["metrics.final_mAP50 DESC"]
)

print("\nRuns del experimento (ordenados por mAP@0.5):")
print(runs[['run_id', 'tags.model_size', 'params.epochs', 
           'metrics.final_mAP50', 'metrics.final_mAP50-95']].head(10))

## Función helper: tool_mlflow_train

In [None]:
def tool_mlflow_train(config: dict, dataset_version: str = "coco128") -> str:
    """
    Tool para entrenar modelo con MLflow tracking
    
    Args:
        config: Configuración de training
        dataset_version: Versión del dataset
    
    Returns:
        run_id: ID del run de MLflow
    """
    dataset_yaml = COCO_DIR / "coco128.yaml"
    
    run_name = f"train_{config.get('model_size', 'yolo12n')}_{dataset_version}"
    run_id, _ = train_yolo_with_mlflow(config, dataset_yaml, run_name)
    
    return run_id

# Test del tool
# test_run_id = tool_mlflow_train(TRAIN_CONFIG, "coco128_v1")
# print(f"Test run_id: {test_run_id}")

## ✅ Training completado

**Logrado:**
- ✅ Fine-tuning de YOLO12 en COCO128
- ✅ Logging completo en MLflow (params, metrics, artifacts)
- ✅ Checkpoints automáticos
- ✅ Gráficas de training guardadas
- ✅ Tool `tool_mlflow_train` implementado

**Siguiente:** Model Registry & Inference → `04_inference_registry.ipynb`