# 📓 MLflow - Ejemplos básicos
# 
En esta notebook veremos ejemplos sencillos de:
- MLflow Tracking
- MLflow Projects
- MLflow Models
- MLflow Model Registry

## Instalación y configuración inicial
Instalación de MLflow (descomentar si no lo tienes instalado)
!pip install mlflow


In [2]:
# ========================
# MLflow - Ejemplos básicos
# ========================

# Instalación (si es necesario)
# !pip install mlflow

import mlflow
import mlflow.sklearn
from sklearn.linear_model import LinearRegression
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
import os
import yaml
import pandas as pd
# Crear carpeta para artefactos si no existe
os.makedirs("outputs", exist_ok=True)


## 🧩 1. MLflow Tracking
 
### 📄 Descripción
Con **MLflow Tracking** registramos:
 - Parámetros (por ejemplo, hiperparámetros de un modelo).
 - Métricas (precisión, error, R2, etc.).
 - Artefactos (modelos entrenados, imágenes, datasets).
 
Todo queda guardado en un historial organizado para análisis y comparación.


## Nombre del proyecto y url de MLFlow local

In [3]:
#mlflow.set_tracking_uri("http://localhost:5000")  # o la IP donde esté el Docker
mlflow.set_experiment("Regresion lineal 17")

Traceback (most recent call last):
  File "c:\Users\guill\OneDrive\Documentos\simplegit\ITBA\ITBA\lib\site-packages\mlflow\store\tracking\file_store.py", line 329, in search_experiments
    exp = self._get_experiment(exp_id, view_type)
  File "c:\Users\guill\OneDrive\Documentos\simplegit\ITBA\ITBA\lib\site-packages\mlflow\store\tracking\file_store.py", line 427, in _get_experiment
    meta = FileStore._read_yaml(experiment_dir, FileStore.META_DATA_FILE_NAME)
  File "c:\Users\guill\OneDrive\Documentos\simplegit\ITBA\ITBA\lib\site-packages\mlflow\store\tracking\file_store.py", line 1373, in _read_yaml
    return _read_helper(root, file_name, attempts_remaining=retries)
  File "c:\Users\guill\OneDrive\Documentos\simplegit\ITBA\ITBA\lib\site-packages\mlflow\store\tracking\file_store.py", line 1366, in _read_helper
    result = read_yaml(root, file_name)
  File "c:\Users\guill\OneDrive\Documentos\simplegit\ITBA\ITBA\lib\site-packages\mlflow\utils\file_utils.py", line 310, in read_yaml
    r

<Experiment: artifact_location='file:///c:/Users/guill/OneDrive/Documentos/simplegit/ITBA/mlruns/496517268595748721', creation_time=1747172019048, experiment_id='496517268595748721', last_update_time=1747172019048, lifecycle_stage='active', name='Regresion lineal 17', tags={}>

In [22]:
# Datos ficticios
X, y = make_regression(n_samples=100, n_features=1, noise=0.1, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

# 2. Guardar los datasets como CSV
os.makedirs("data", exist_ok=True)
pd.DataFrame(X_train, columns=["x"]).to_csv("data/X_train.csv", index=False)
pd.DataFrame(X_test, columns=["x"]).to_csv("data/X_test.csv", index=False)
pd.DataFrame(y_train, columns=["y"]).to_csv("data/y_train.csv", index=False)
pd.DataFrame(y_test, columns=["y"]).to_csv("data/y_test.csv", index=False)


In [21]:
# Ejemplo sencillo de Tracking

#mlflow.end_run()
# Empezar una corrida (run)
with mlflow.start_run(run_name="linear_regression_example 2"):
    # Modelo
    model = LinearRegression()
    model.fit(X_train, y_train)
    
    # Parámetro (en este caso no hay hiperparámetros, así que lo simulamos)
    mlflow.log_param("model_type", "LinearRegression")
    
    # Métrica
    score = model.score(X_test, y_test)
    mlflow.log_metric("r2_score", score)
    
    # Guardar el modelo
    mlflow.sklearn.log_model(model, artifact_path="model")
    
    print(f"Modelo guardado con R2: {score:.2f}")




Modelo guardado con R2: 1.00
🏃 View run linear_regression_example 2 at: http://localhost:5000/#/experiments/395915654146616026/runs/07ab9b2436324b56a964c23fa1d14eb1
🧪 View experiment at: http://localhost:5000/#/experiments/395915654146616026


In [22]:
from sklearn.linear_model import Ridge

with mlflow.start_run(run_name="ridge_regression_example 3"):
    alpha = 0.7
    model = Ridge(alpha=alpha)
    model.fit(X_train, y_train)

    mlflow.log_param("model_type", "Ridge")
    mlflow.log_param("alpha", alpha)

    score = model.score(X_test, y_test)
    mlflow.log_metric("r2_score", score)

    mlflow.sklearn.log_model(model, artifact_path="model")
    print(f"Modelo Ridge guardado con R2: {score:.2f}")
    X_train_path = "data/X_train.csv"
    X_test_path = "data/X_test.csv"
    y_train_path = "data/y_train.csv"
    y_test_path = "data/y_test.csv"
    mlflow.log_artifact(X_train_path, artifact_path="datasets")
    mlflow.log_artifact(X_test_path, artifact_path="datasets")
    mlflow.log_artifact(y_train_path, artifact_path="datasets")
    mlflow.log_artifact(y_test_path, artifact_path="datasets")

mlflow.sklearn.log_model(model, "model", input_example=X_test[:1])
    



Modelo Ridge guardado con R2: 1.00
🏃 View run ridge_regression_example 3 at: http://localhost:5000/#/experiments/395915654146616026/runs/80845bb41632443db3c6d863c91fc478
🧪 View experiment at: http://localhost:5000/#/experiments/395915654146616026


Downloading artifacts: 100%|██████████| 7/7 [00:00<00:00, 1749.19it/s]


<mlflow.models.model.ModelInfo at 0x1f7fb7b6550>

In [4]:
import mlflow
import numpy as np

with mlflow.start_run(run_name="training_with_curve"):
    for epoch in range(10):
        # Simulación de loss que baja
        loss = np.exp(-epoch / 5)

        # Logueás la métrica con `step` para que MLflow genere la curva
        mlflow.log_metric("loss", loss, step=epoch)


In [27]:
mlflow.set_experiment("Regresion lineal 8")

mlflow.set_tag("model_name", "RandomForest")
mlflow.set_tag("experiment", "baseline")

models = {"LinearRegression": LinearRegression(), "Ridge": Ridge(alpha=0.5)}



In [28]:
mlflow.end_run()
for name, model in models.items():
    with mlflow.start_run(run_name=name):
        mlflow.set_tag("model_name", name)

        for epoch in range(10):
            loss = np.exp(-epoch / 5)  # Ejemplo de pérdida
            mlflow.log_metric("loss", loss, step=epoch)

        mlflow.sklearn.log_model(model, artifact_path="model")



🏃 View run fearless-mare-156 at: http://localhost:5000/#/experiments/128589332145951270/runs/92a37d8eb5c64b47aafc1f261ff83727
🧪 View experiment at: http://localhost:5000/#/experiments/128589332145951270




🏃 View run LinearRegression at: http://localhost:5000/#/experiments/128589332145951270/runs/62097901e6974d668efd3ce344a99fdf
🧪 View experiment at: http://localhost:5000/#/experiments/128589332145951270




🏃 View run Ridge at: http://localhost:5000/#/experiments/128589332145951270/runs/eab2a1d7beee483e84d8153f4cb14580
🧪 View experiment at: http://localhost:5000/#/experiments/128589332145951270


In [58]:
mlflow.set_experiment("Seleccionar mejor modelo")

import mlflow
import mlflow.sklearn
import numpy as np
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# Datos ficticios
X, y = make_regression(n_samples=100, n_features=1, noise=0.1, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

# Modelos a evaluar
models = {"LinearRegression": LinearRegression(), "Ridge": Ridge(alpha=0.5)}

best_model = None
best_score = -np.inf  # Mejor score encontrado (por ejemplo, R2 o MSE)

# Comenzamos los experimentos para cada modelo
for name, model in models.items():
    with mlflow.start_run(run_name=name):
        mlflow.set_tag("model_name", name)

        # Entrenamiento
        model.fit(X_train, y_train)

        # Predicciones y métricas
        y_pred = model.predict(X_test)
        score = mean_squared_error(y_test, y_pred)  # Usamos MSE como ejemplo
        
        # Registro de parámetros y métricas
        mlflow.log_param("model_type", name)
        mlflow.log_metric("mse", score)
        
        # Guardamos el modelo
        mlflow.sklearn.log_model(model, artifact_path="model")
        
        # Evaluamos cuál es el mejor modelo
        if score < best_score:  # MSE más bajo es mejor
            best_score = score
            best_model = model
            mlflow.log_param("best_model", True)  # Marcamos el modelo ganador

# Ahora puedes registrar el mejor modelo global
if best_model:
    with mlflow.start_run(run_name="Best_Model"):
        mlflow.log_param("best_model_name", best_model.__class__.__name__)
        mlflow.sklearn.log_model(best_model, artifact_path="best_model")
        print("Modelo ganador registrado!")


2025/05/11 22:50:38 INFO mlflow.tracking.fluent: Experiment with name 'Seleccionar mejor modelo' does not exist. Creating a new experiment.


## HYPERPARAMETER TUNING

In [None]:
import mlflow
import mlflow.sklearn
from mlflow import log_param, log_metric, log_artifact

from sklearn.datasets import load_diabetes
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split

import json
import joblib

# Cargar los datos
X, y = load_diabetes(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Definir el modelo y el espacio de búsqueda
model = Ridge()
param_grid = {
    'alpha': [0.1, 1.0, 10.0],
    'solver': ['auto', 'svd']
}

# Configurar GridSearchCV
grid_search = GridSearchCV(model, param_grid, cv=3, scoring="neg_mean_squared_error", return_train_score=True)

# Iniciar experimento en MLflow
mlflow.set_experiment("Ridge Regression GridSearch")
with mlflow.start_run(run_name="GridSearchCV") as run:
    # Guardar el grid como parámetro
    mlflow.log_param("param_grid", param_grid)
    
    # Guardar el grid como archivo JSON
    with open("param_grid.json", "w") as f:
        json.dump(param_grid, f)
    mlflow.log_artifact("param_grid.json")
    
    # Ejecutar GridSearch
    grid_search.fit(X_train, y_train)
        
    # Loguear cada intento como sub-run
    for i, params in enumerate(grid_search.cv_results_["params"]):
        with mlflow.start_run(run_name=f"trial_{i}", nested=True):
            mlflow.log_params(params)
            mlflow.log_metric("mean_test_score", grid_search.cv_results_["mean_test_score"][i])
            mlflow.log_metric("mean_train_score", grid_search.cv_results_["mean_train_score"][i])
    
    # Evaluar el mejor modelo
    best_model = grid_search.best_estimator_
    y_pred = best_model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    mlflow.log_metric("test_mse", mse)
    
    # Guardar el modelo y objeto grid_search completo
    mlflow.sklearn.log_model(best_model, "best_model")
    joblib.dump(grid_search, "grid_search.pkl")
    mlflow.log_artifact("grid_search.pkl")


Traceback (most recent call last):
  File "c:\Users\guill\OneDrive\Documentos\simplegit\ITBA\ITBA\lib\site-packages\mlflow\store\tracking\file_store.py", line 329, in search_experiments
    exp = self._get_experiment(exp_id, view_type)
  File "c:\Users\guill\OneDrive\Documentos\simplegit\ITBA\ITBA\lib\site-packages\mlflow\store\tracking\file_store.py", line 427, in _get_experiment
    meta = FileStore._read_yaml(experiment_dir, FileStore.META_DATA_FILE_NAME)
  File "c:\Users\guill\OneDrive\Documentos\simplegit\ITBA\ITBA\lib\site-packages\mlflow\store\tracking\file_store.py", line 1373, in _read_yaml
    return _read_helper(root, file_name, attempts_remaining=retries)
  File "c:\Users\guill\OneDrive\Documentos\simplegit\ITBA\ITBA\lib\site-packages\mlflow\store\tracking\file_store.py", line 1366, in _read_helper
    result = read_yaml(root, file_name)
  File "c:\Users\guill\OneDrive\Documentos\simplegit\ITBA\ITBA\lib\site-packages\mlflow\utils\file_utils.py", line 310, in read_yaml
    r

## 🧩 2. MLflow Projects
 
### 📄 Descripción
**MLflow Projects** define un estándar para empaquetar proyectos de ML, haciéndolos:
 - Reproducibles (cualquiera puede correrlo igual).
 - Ejecutables localmente o en la nube.
 - Versionables junto al código.
 
<!-- Utiliza un archivo `MLproject` (YAML) para describir dependencias y comandos de entrada. -->


In [8]:
# Creamos un archivo MLproject para definir el proyecto

project_yaml = """
name: simple_linear_regression
conda_env: conda.yaml

entry_points:
  main:
    parameters:
      alpha: {type: float, default: 0.5}
    command: "python train.py --alpha {alpha}"
"""

with open("MLproject", "w") as f:
    f.write(project_yaml)

# Creamos un entorno conda de ejemplo
conda_yaml = """
name: simple-mlflow-env
dependencies:
  - python=3.8
  - scikit-learn
  - pip
  - pip:
      - mlflow
"""

with open("conda.yaml", "w") as f:
    f.write(conda_yaml)

print("Archivo MLproject y conda.yaml creados 🎯")


Archivo MLproject y conda.yaml creados 🎯


## 🧩 3. MLflow Models
 
### 📄 Descripción
**MLflow Models** permite:
 - Guardar modelos entrenados en formatos estándar.
 - Cargarlos fácilmente para predicción o despliegue.
 - Exportarlos a múltiples plataformas (Docker, REST API, mobile).

In [9]:
# Guardamos el modelo entrenado en disco
model_path = "outputs/linear_model"
mlflow.sklearn.save_model(model, model_path)

# Cargamos el modelo desde disco
loaded_model = mlflow.sklearn.load_model(model_path)

# Realizamos una predicción de prueba
prediction = loaded_model.predict(X_test[:2])
print("Predicción ejemplo:", prediction)


Predicción ejemplo: [-55.4786619   61.72358202]


## 🧩 4. MLflow Model Registry
 
### 📄 Descripción
**MLflow Model Registry** gestiona:
 - Versiones de modelos.
 - Etapas del ciclo de vida (Staging, Production, Archived).
 - Aprobaciones y revisiones de modelos.
 
 **Importante:** Para usarlo realmente se necesita un Tracking Server conectado a una base de datos.
 Aquí simulamos un ejemplo sencillo en local.

In [None]:
#import mlflow
#from mlflow.tracking import MlflowClient

# Paso 1: Especificar el run_id y la ruta del modelo dentro del run
#run_id = "TU_RUN_ID"
#model_path = "best_model"  # o como lo hayas llamado: "model", "sk_model", etc.

# Paso 2: Registrar el modelo
#model_uri = f"runs:/{run_id}/{model_path}"
#model_name = "ridge_model_v1"

#mlflow.register_model(model_uri=model_uri, name=model_name)


In [10]:
# Buscar si el experimento ya existe
experiment_name = "Model_Registry_Example"

experiment = mlflow.get_experiment_by_name(experiment_name)

if experiment is None:
    experiment_id = mlflow.create_experiment(experiment_name)
    print(f"Experimento '{experiment_name}' creado 🎯")
else:
    experiment_id = experiment.experiment_id
    print(f"Experimento '{experiment_name}' ya existía, usando id {experiment_id} ✔️")

# Seteamos el experimento como activo
mlflow.set_experiment(experiment_name)

# Registramos el modelo
with mlflow.start_run(run_name="registry_test_run"):
    mlflow.sklearn.log_model(model, "model", registered_model_name="LinearRegressionModel")



Experimento 'Model_Registry_Example' ya existía, usando id 3 ✔️


Registered model 'LinearRegressionModel' already exists. Creating a new version of this model...
2025/05/11 19:43:46 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: LinearRegressionModel, version 5
Created version '5' of model 'LinearRegressionModel'.


🏃 View run registry_test_run at: http://localhost:5000/#/experiments/3/runs/c11e00b1a53841a2bee9de21e3c6933f
🧪 View experiment at: http://localhost:5000/#/experiments/3


In [12]:
# Búsqueda de modelos con filtros
models = client.search_registered_models(filter_string="name LIKE '%'")
for model in models:
    print(model.name)

Ridge
modelo ganador


In [34]:
import mlflow


# Opción 4: Cargar desde una etapa específica (si usas ciclo de vida de MLflow)
loaded_model = mlflow.pyfunc.load_model("models:/Ridge/1")  # Otras opciones: staging, archived

# Usar el modelo para predicciones
data= X_train[[0]]
predictions = loaded_model.predict(data)

In [35]:
predictions

array([3.74572842])

In [None]:
X_train[[0]]

array([[0.09176078]])

In [None]:
%%writefile app.py

import uvicorn
import mlflow
import numpy as np
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from typing import List

# Crear la aplicación FastAPI
app = FastAPI(title="Modelo ML API", description="API para servir predicciones del modelo de MLflow")

# Cargar el modelo al iniciar la aplicación
# Reemplaza "models:/nombre_modelo/version" con tu ruta real
model = mlflow.pyfunc.load_model("models:/Ridge/1")

# Definir el esquema de la solicitud
class PredictionRequest(BaseModel):
    features: List[List[float]]

# Definir el esquema de la respuesta
class PredictionResponse(BaseModel):
    predictions: List

@app.post("/predict", response_model=PredictionResponse)
def predict(request: PredictionRequest):
    try:
        # Convertir las características a un array numpy
        features = np.array(request.features)
        # Realizar la predicción
        predictions = model.predict(features).tolist()
        # Devolver las predicciones
        return PredictionResponse(predictions=predictions)
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))

@app.get("/health")
def health():
    return {"status": "ok"}



In [None]:
import uvicorn
import mlflow
import numpy as np
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from typing import List

uvicorn.run("app:app", host="localhost", port=7000, reload=True)
#curl -X POST "http://localhost:7000/predict"      -H "Content-Type: application/json"      -d '{"features": [[1.2], [3.5]]}'


INFO:     Will watch for changes in these directories: ['c:\\Users\\guill\\OneDrive\\Documentos\\simplegit\\ITBA']
INFO:     Uvicorn running on http://localhost:7000 (Press CTRL+C to quit)
INFO:     Started reloader process [13244] using WatchFiles
