# 🐧 Experimentos MLflow - Palmer Penguins

Este notebook implementa:
1. Verificación de servicios y bucket S3
2. Carga de datos en MySQL (penguins_raw)
3. Preprocesamiento y generación de datos limpios (penguins_clean)
4. ≥20 experimentos con diferentes modelos e hiperparámetros
5. Registro del mejor modelo en MLflow Model Registry

In [1]:
# Fix de compatibilidad para SQLAlchemy 2.0
import sqlalchemy
from sqlalchemy import text as sql_text

# Monkey patch para compatibilidad
original_execute = sqlalchemy.engine.Connection.execute

def patched_execute(self, statement, *args, **kwargs):
    if isinstance(statement, str):
        statement = sql_text(statement)
    return original_execute(self, statement, *args, **kwargs)

sqlalchemy.engine.Connection.execute = patched_execute

print("✅ Patch aplicado para compatibilidad con SQLAlchemy 2.0")

✅ Patch aplicado para compatibilidad con SQLAlchemy 2.0


In [2]:
# Importaciones necesarias
import os
import time
import pandas as pd
import numpy as np
import mlflow
import mlflow.sklearn
import boto3
from botocore.exceptions import NoCredentialsError, ClientError
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, f1_score, classification_report
import xgboost as xgb
import lightgbm as lgb
import pymysql
from sqlalchemy import create_engine, text
import warnings
warnings.filterwarnings('ignore')

# Configurar MLflow
mlflow.set_tracking_uri(os.getenv('MLFLOW_TRACKING_URI', 'http://mlflow:5000'))
print(f"MLflow tracking URI: {mlflow.get_tracking_uri()}")

MLflow tracking URI: http://mlflow:5000


## 0. Verificar configuración y crear bucket si es necesario

In [3]:
# Verificar y crear bucket S3 si es necesario
def verify_s3_bucket():
    """Verifica que el bucket mlflows3 existe, intenta crearlo si no existe"""
    try:
        # Configurar cliente S3
        s3_client = boto3.client(
            's3',
            endpoint_url=os.getenv('MLFLOW_S3_ENDPOINT_URL', 'http://minio:9000'),
            aws_access_key_id=os.getenv('AWS_ACCESS_KEY_ID', 'admin'),
            aws_secret_access_key=os.getenv('AWS_SECRET_ACCESS_KEY', 'supersecret'),
            region_name=os.getenv('AWS_DEFAULT_REGION', 'us-east-1')
        )
        
        # Verificar si el bucket existe
        try:
            s3_client.head_bucket(Bucket='mlflows3')
            print("✅ Bucket 'mlflows3' existe y está accesible")
            return True
        except ClientError as e:
            error_code = int(e.response['Error']['Code'])
            if error_code == 404:
                print("⚠️  Bucket 'mlflows3' no existe. Intentando crear...")
                try:
                    s3_client.create_bucket(Bucket='mlflows3')
                    print("✅ Bucket 'mlflows3' creado exitosamente")
                    return True
                except Exception as create_error:
                    print(f"❌ Error al crear bucket: {create_error}")
                    return False
            else:
                print(f"❌ Error al verificar bucket: {e}")
                return False
    except Exception as e:
        print(f"❌ Error de conexión S3: {e}")
        return False

# Verificar bucket antes de continuar
bucket_ready = verify_s3_bucket()

if not bucket_ready:
    print("\n⚠️  ADVERTENCIA: El bucket S3 no está disponible.")
    print("Los experimentos pueden fallar al guardar artefactos.")
    print("Intenta ejecutar en una terminal:")
    print("docker exec mlflow-minio mc mb myminio/mlflows3")
    print("docker exec mlflow-minio mc anonymous set download myminio/mlflows3")
    print("\nEsperando 30 segundos antes de continuar...")
    time.sleep(30)
    # Intentar una vez más
    bucket_ready = verify_s3_bucket()

⚠️  Bucket 'mlflows3' no existe. Intentando crear...
✅ Bucket 'mlflows3' creado exitosamente


In [4]:
# Configuración de conexión a MySQL
MYSQL_CONFIG = {
    'host': os.getenv('MYSQL_HOST', 'mysql'),
    'port': int(os.getenv('MYSQL_PORT', 3306)),
    'user': os.getenv('MYSQL_USER', 'penguins'),
    'password': os.getenv('MYSQL_PASSWORD', 'penguins123'),
    'database': os.getenv('MYSQL_DATABASE', 'penguins_db')
}

# Crear engine de SQLAlchemy
engine = create_engine(
    f"mysql+pymysql://{MYSQL_CONFIG['user']}:{MYSQL_CONFIG['password']}@"
    f"{MYSQL_CONFIG['host']}:{MYSQL_CONFIG['port']}/{MYSQL_CONFIG['database']}"
)

## 1. Cargar datos crudos en MySQL

In [5]:
# Cargar dataset de penguins
try:
    from palmerpenguins import load_penguins
    df_raw = load_penguins()
    print("Dataset cargado desde palmerpenguins")
except:
    import seaborn as sns
    df_raw = sns.load_dataset('penguins')
    print("Dataset cargado desde seaborn")

print(f"Shape: {df_raw.shape}")
print(f"Columnas: {df_raw.columns.tolist()}")
df_raw.head()

Dataset cargado desde palmerpenguins
Shape: (344, 8)
Columnas: ['species', 'island', 'bill_length_mm', 'bill_depth_mm', 'flipper_length_mm', 'body_mass_g', 'sex', 'year']


Unnamed: 0,species,island,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g,sex,year
0,Adelie,Torgersen,39.1,18.7,181.0,3750.0,male,2007
1,Adelie,Torgersen,39.5,17.4,186.0,3800.0,female,2007
2,Adelie,Torgersen,40.3,18.0,195.0,3250.0,female,2007
3,Adelie,Torgersen,,,,,,2007
4,Adelie,Torgersen,36.7,19.3,193.0,3450.0,female,2007


In [6]:
# Limpiar tabla penguins_raw
with engine.connect() as conn:
    conn.execute("TRUNCATE TABLE penguins_raw")
    conn.commit()
print("Tabla penguins_raw limpiada")

# Insertar datos crudos
df_raw.to_sql('penguins_raw', engine, if_exists='append', index=False)
print(f"✓ {len(df_raw)} registros insertados en penguins_raw")

Tabla penguins_raw limpiada
✓ 344 registros insertados en penguins_raw


## 2. Preprocesar datos y generar penguins_clean

In [7]:
# Leer datos desde MySQL
df_from_db = pd.read_sql("SELECT * FROM penguins_raw", engine)
print(f"Datos leídos: {df_from_db.shape}")

# Eliminar filas con valores nulos en features críticas
critical_features = ['bill_length_mm', 'bill_depth_mm', 'flipper_length_mm', 'body_mass_g', 'species']
df_clean = df_from_db.dropna(subset=critical_features)
print(f"Después de eliminar nulos: {df_clean.shape}")

# Codificar variable objetivo
label_encoder = LabelEncoder()
df_clean['species_encoded'] = label_encoder.fit_transform(df_clean['species'])
species_mapping = {i: sp for i, sp in enumerate(label_encoder.classes_)}
print(f"Mapeo de especies: {species_mapping}")

# Rellenar valores faltantes opcionales
df_clean['sex'] = df_clean['sex'].fillna('Unknown')
df_clean['year'] = df_clean['year'].fillna(df_clean['year'].median())

# Agregar timestamp
df_clean['processed_at'] = pd.Timestamp.now()

Datos leídos: (344, 10)
Después de eliminar nulos: (342, 10)
Mapeo de especies: {0: 'Adelie', 1: 'Chinstrap', 2: 'Gentoo'}


In [8]:
# Limpiar e insertar en penguins_clean
with engine.connect() as conn:
    conn.execute("TRUNCATE TABLE penguins_clean")
    conn.commit()

df_clean.to_sql('penguins_clean', engine, if_exists='append', index=False)
print(f"✓ {len(df_clean)} registros insertados en penguins_clean")

✓ 342 registros insertados en penguins_clean


## 3. Preparar datos para Machine Learning

In [9]:
# Preparar features y target
feature_cols = ['bill_length_mm', 'bill_depth_mm', 'flipper_length_mm', 'body_mass_g']
X = df_clean[feature_cols].values
y = df_clean['species_encoded'].values

# División train/test
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

print(f"Train set: {X_train.shape}")
print(f"Test set: {X_test.shape}")
print(f"Distribución de clases en train: {np.bincount(y_train)}")

Train set: (273, 4)
Test set: (69, 4)
Distribución de clases en train: [121  54  98]


## 4. Experimentos con MLflow (≥20 runs)

In [10]:
# Crear experimento en MLflow
experiment_name = "penguins-classification"
mlflow.set_experiment(experiment_name)

# Obtener ID del experimento
experiment = mlflow.get_experiment_by_name(experiment_name)
print(f"Experimento: {experiment_name} (ID: {experiment.experiment_id})")

Experimento: penguins-classification (ID: 1)


In [11]:
# Función para entrenar modelo con manejo de errores
def train_and_log_model(model, model_name, run_name, params, X_train, y_train, X_test, y_test):
    """Entrena un modelo y lo registra en MLflow con manejo de errores"""
    try:
        with mlflow.start_run(run_name=run_name):
            # Log parameters
            mlflow.log_params(params)
            mlflow.log_param("model_type", model_name)
            
            # Entrenar modelo
            model.fit(X_train, y_train)
            
            # Predicciones
            y_pred = model.predict(X_test)
            
            # Métricas
            accuracy = accuracy_score(y_test, y_pred)
            f1 = f1_score(y_test, y_pred, average='macro')
            
            # Log metrics
            mlflow.log_metric("accuracy", accuracy)
            mlflow.log_metric("f1_score", f1)
            
            # Intentar log model con reintentos
            max_retries = 3
            for retry in range(max_retries):
                try:
                    mlflow.sklearn.log_model(
                        model, 
                        "model",
                        input_example=X_train[:1],
                        signature=mlflow.models.infer_signature(X_train, y_train)
                    )
                    break
                except Exception as e:
                    if "NoSuchBucket" in str(e) and retry < max_retries - 1:
                        print(f"⚠️  Error con bucket S3, reintentando en 5s... ({retry+1}/{max_retries})")
                        time.sleep(5)
                    else:
                        print(f"❌ Error al guardar modelo: {e}")
                        print("   El modelo se entrenó pero no se pudo guardar en S3")
            
            print(f"{run_name}: Accuracy={accuracy:.4f}, F1={f1:.4f}")
            return accuracy, f1
            
    except Exception as e:
        print(f"❌ Error en {run_name}: {e}")
        return None, None

### Experimentos 1-5: Random Forest con diferentes hiperparámetros

In [12]:
# Configuraciones de Random Forest
rf_configs = [
    {'n_estimators': 50, 'max_depth': 5, 'min_samples_split': 5},
    {'n_estimators': 100, 'max_depth': 10, 'min_samples_split': 2},
    {'n_estimators': 200, 'max_depth': None, 'min_samples_split': 2},
    {'n_estimators': 300, 'max_depth': 15, 'min_samples_split': 4},
    {'n_estimators': 500, 'max_depth': 20, 'min_samples_split': 3}
]

for i, config in enumerate(rf_configs, 1):
    rf = RandomForestClassifier(random_state=42, **config)
    train_and_log_model(
        rf, 
        "RandomForest", 
        f"rf_experiment_{i}", 
        config,
        X_train, y_train, X_test, y_test
    )



rf_experiment_1: Accuracy=1.0000, F1=1.0000
🏃 View run rf_experiment_1 at: http://mlflow:5000/#/experiments/1/runs/bd5a0c2810934b8da64029bb73e87ce6
🧪 View experiment at: http://mlflow:5000/#/experiments/1




rf_experiment_2: Accuracy=1.0000, F1=1.0000
🏃 View run rf_experiment_2 at: http://mlflow:5000/#/experiments/1/runs/21cd87ed6aab4f699769f7d48e2458d9
🧪 View experiment at: http://mlflow:5000/#/experiments/1




rf_experiment_3: Accuracy=1.0000, F1=1.0000
🏃 View run rf_experiment_3 at: http://mlflow:5000/#/experiments/1/runs/3f7b8534b72c4f83869867229215f362
🧪 View experiment at: http://mlflow:5000/#/experiments/1




rf_experiment_4: Accuracy=1.0000, F1=1.0000
🏃 View run rf_experiment_4 at: http://mlflow:5000/#/experiments/1/runs/9dc3cf3bf32b463582e903463ba74508
🧪 View experiment at: http://mlflow:5000/#/experiments/1




rf_experiment_5: Accuracy=1.0000, F1=1.0000
🏃 View run rf_experiment_5 at: http://mlflow:5000/#/experiments/1/runs/478fb460d6b742fc8c48042852c6a3ca
🧪 View experiment at: http://mlflow:5000/#/experiments/1


### Experimentos 6-10: KNN con diferentes valores de k

In [13]:
# KNN experiments
k_values = [3, 5, 7, 10, 15]
weights_options = ['uniform', 'distance']

exp_num = 6
for k in k_values:
    for weights in weights_options[:1]:  # Solo 'uniform' para llegar a 5 experimentos
        # Crear pipeline con escalado
        knn_pipeline = Pipeline([
            ('scaler', StandardScaler()),
            ('knn', KNeighborsClassifier(n_neighbors=k, weights=weights))
        ])
        
        params = {'n_neighbors': k, 'weights': weights}
        train_and_log_model(
            knn_pipeline,
            "KNN",
            f"knn_experiment_{exp_num}",
            params,
            X_train, y_train, X_test, y_test
        )
        exp_num += 1



knn_experiment_6: Accuracy=1.0000, F1=1.0000
🏃 View run knn_experiment_6 at: http://mlflow:5000/#/experiments/1/runs/2d7c4a0270c24fbca4b7b73b69f7dafa
🧪 View experiment at: http://mlflow:5000/#/experiments/1




knn_experiment_7: Accuracy=1.0000, F1=1.0000
🏃 View run knn_experiment_7 at: http://mlflow:5000/#/experiments/1/runs/b3d5015c47404e6bad85be0b233e4b20
🧪 View experiment at: http://mlflow:5000/#/experiments/1




knn_experiment_8: Accuracy=1.0000, F1=1.0000
🏃 View run knn_experiment_8 at: http://mlflow:5000/#/experiments/1/runs/5b4352bf77f24b6b80f4640f6702a34c
🧪 View experiment at: http://mlflow:5000/#/experiments/1




knn_experiment_9: Accuracy=0.9855, F1=0.9822
🏃 View run knn_experiment_9 at: http://mlflow:5000/#/experiments/1/runs/b6efd9e4c195479da39ae30d24167379
🧪 View experiment at: http://mlflow:5000/#/experiments/1




knn_experiment_10: Accuracy=0.9855, F1=0.9822
🏃 View run knn_experiment_10 at: http://mlflow:5000/#/experiments/1/runs/8deb01d08f5d4514854723c1e25ab13b
🧪 View experiment at: http://mlflow:5000/#/experiments/1


### Experimentos 11-15: SVM con diferentes kernels

In [14]:
# SVM experiments
svm_configs = [
    {'kernel': 'linear', 'C': 0.1},
    {'kernel': 'linear', 'C': 1.0},
    {'kernel': 'rbf', 'C': 1.0, 'gamma': 'scale'},
    {'kernel': 'rbf', 'C': 10.0, 'gamma': 'auto'},
    {'kernel': 'poly', 'C': 1.0, 'degree': 3}
]

for i, config in enumerate(svm_configs, 11):
    # Crear pipeline con escalado
    svm_pipeline = Pipeline([
        ('scaler', StandardScaler()),
        ('svm', SVC(probability=True, random_state=42, **config))
    ])
    
    train_and_log_model(
        svm_pipeline,
        "SVM",
        f"svm_experiment_{i}",
        config,
        X_train, y_train, X_test, y_test
    )



svm_experiment_11: Accuracy=1.0000, F1=1.0000
🏃 View run svm_experiment_11 at: http://mlflow:5000/#/experiments/1/runs/38f2d771047241a5982cdf4df631cacd
🧪 View experiment at: http://mlflow:5000/#/experiments/1




svm_experiment_12: Accuracy=1.0000, F1=1.0000
🏃 View run svm_experiment_12 at: http://mlflow:5000/#/experiments/1/runs/a8132231562c4b13abf34ae0cc046a11
🧪 View experiment at: http://mlflow:5000/#/experiments/1




svm_experiment_13: Accuracy=1.0000, F1=1.0000
🏃 View run svm_experiment_13 at: http://mlflow:5000/#/experiments/1/runs/fe42d9f7ca964501b04ed5988ec360b9
🧪 View experiment at: http://mlflow:5000/#/experiments/1




svm_experiment_14: Accuracy=1.0000, F1=1.0000
🏃 View run svm_experiment_14 at: http://mlflow:5000/#/experiments/1/runs/3d82b24ca2d74710ad17a89161970687
🧪 View experiment at: http://mlflow:5000/#/experiments/1




svm_experiment_15: Accuracy=0.9565, F1=0.9441
🏃 View run svm_experiment_15 at: http://mlflow:5000/#/experiments/1/runs/0aefc43fe09e415abfd243e42208686c
🧪 View experiment at: http://mlflow:5000/#/experiments/1


### Experimentos 16-20: XGBoost con diferentes configuraciones

In [15]:
# XGBoost experiments
xgb_configs = [
    {'n_estimators': 50, 'max_depth': 3, 'learning_rate': 0.1},
    {'n_estimators': 100, 'max_depth': 5, 'learning_rate': 0.05},
    {'n_estimators': 200, 'max_depth': 7, 'learning_rate': 0.01},
    {'n_estimators': 300, 'max_depth': 4, 'learning_rate': 0.1},
    {'n_estimators': 150, 'max_depth': 6, 'learning_rate': 0.3}
]

for i, config in enumerate(xgb_configs, 16):
    xgb_model = xgb.XGBClassifier(
        objective='multi:softprob',
        random_state=42,
        **config
    )
    
    train_and_log_model(
        xgb_model,
        "XGBoost",
        f"xgboost_experiment_{i}",
        config,
        X_train, y_train, X_test, y_test
    )



xgboost_experiment_16: Accuracy=0.9565, F1=0.9528
🏃 View run xgboost_experiment_16 at: http://mlflow:5000/#/experiments/1/runs/27dc3d1622224798b2562ee82d1b970b
🧪 View experiment at: http://mlflow:5000/#/experiments/1




xgboost_experiment_17: Accuracy=0.9565, F1=0.9528
🏃 View run xgboost_experiment_17 at: http://mlflow:5000/#/experiments/1/runs/e6f3340c25cb4c9eb6a906ea44630cd0
🧪 View experiment at: http://mlflow:5000/#/experiments/1




xgboost_experiment_18: Accuracy=0.9710, F1=0.9692
🏃 View run xgboost_experiment_18 at: http://mlflow:5000/#/experiments/1/runs/eb427ffbbace430882e5a5fb02f7c24c
🧪 View experiment at: http://mlflow:5000/#/experiments/1




xgboost_experiment_19: Accuracy=0.9710, F1=0.9653
🏃 View run xgboost_experiment_19 at: http://mlflow:5000/#/experiments/1/runs/b5820f4fe9aa41188471ef6077306c09
🧪 View experiment at: http://mlflow:5000/#/experiments/1




xgboost_experiment_20: Accuracy=0.9710, F1=0.9653
🏃 View run xgboost_experiment_20 at: http://mlflow:5000/#/experiments/1/runs/7a2ae89a3e1e4c15bc2afc12e2c58fad
🧪 View experiment at: http://mlflow:5000/#/experiments/1


### Experimentos adicionales 21-25: LightGBM

In [16]:
# LightGBM experiments para completar ≥20
lgb_configs = [
    {'n_estimators': 100, 'num_leaves': 31, 'learning_rate': 0.1},
    {'n_estimators': 200, 'num_leaves': 50, 'learning_rate': 0.05},
    {'n_estimators': 150, 'num_leaves': 20, 'learning_rate': 0.15},
    {'n_estimators': 300, 'num_leaves': 40, 'learning_rate': 0.01},
    {'n_estimators': 250, 'num_leaves': 60, 'learning_rate': 0.08}
]

for i, config in enumerate(lgb_configs, 21):
    lgb_model = lgb.LGBMClassifier(
        objective='multiclass',
        random_state=42,
        verbose=-1,
        **config
    )
    
    train_and_log_model(
        lgb_model,
        "LightGBM",
        f"lightgbm_experiment_{i}",
        config,
        X_train, y_train, X_test, y_test
    )



lightgbm_experiment_21: Accuracy=0.9565, F1=0.9528
🏃 View run lightgbm_experiment_21 at: http://mlflow:5000/#/experiments/1/runs/4d1e2592fe724eaa923d58cd0eb2c340
🧪 View experiment at: http://mlflow:5000/#/experiments/1




lightgbm_experiment_22: Accuracy=0.9710, F1=0.9653
🏃 View run lightgbm_experiment_22 at: http://mlflow:5000/#/experiments/1/runs/393e345fb4d248ccbf0743ba96feaa9a
🧪 View experiment at: http://mlflow:5000/#/experiments/1




lightgbm_experiment_23: Accuracy=0.9565, F1=0.9528
🏃 View run lightgbm_experiment_23 at: http://mlflow:5000/#/experiments/1/runs/b59f1936c750451a9be0bf71ddd58a75
🧪 View experiment at: http://mlflow:5000/#/experiments/1




lightgbm_experiment_24: Accuracy=0.9855, F1=0.9817
🏃 View run lightgbm_experiment_24 at: http://mlflow:5000/#/experiments/1/runs/4fd27a4739a64f10b762e00f7433d631
🧪 View experiment at: http://mlflow:5000/#/experiments/1




lightgbm_experiment_25: Accuracy=0.9710, F1=0.9653
🏃 View run lightgbm_experiment_25 at: http://mlflow:5000/#/experiments/1/runs/0eb1030532c64a33b3db349f299c1f5d
🧪 View experiment at: http://mlflow:5000/#/experiments/1


## 5. Seleccionar mejor modelo y registrar en Model Registry

In [17]:
# Buscar el mejor run basado en F1 score
from mlflow.entities import ViewType

runs = mlflow.search_runs(
    experiment_ids=[experiment.experiment_id],
    filter_string="",
    order_by=["metrics.f1_score DESC"],
    max_results=1
)

if len(runs) > 0:
    best_run = runs.iloc[0]
    print(f"Mejor run ID: {best_run['run_id']}")
    print(f"Modelo: {best_run['params.model_type']}")
    print(f"F1 Score: {best_run['metrics.f1_score']:.4f}")
    print(f"Accuracy: {best_run['metrics.accuracy']:.4f}")
else:
    print("❌ No se encontraron runs exitosos")

Mejor run ID: 3d82b24ca2d74710ad17a89161970687
Modelo: SVM
F1 Score: 1.0000
Accuracy: 1.0000


In [18]:
# Registrar el mejor modelo si existe
if len(runs) > 0 and 'artifact_uri' in best_run:
    model_name = "penguins-classifier"
    model_uri = f"runs:/{best_run['run_id']}/model"
    
    # Verificar si el modelo tiene artefactos guardados
    try:
        # Intentar cargar el modelo para verificar que existe
        test_model = mlflow.pyfunc.load_model(model_uri)
        print("✅ Modelo encontrado, procediendo a registrar...")
        
        # Registrar modelo
        try:
            mlflow.register_model(model_uri, model_name)
            print(f"✅ Modelo registrado: {model_name}")
        except Exception as e:
            print(f"Modelo ya existe o error: {e}")
        
        # Obtener versión del modelo
        client = mlflow.tracking.MlflowClient()
        model_version = client.get_latest_versions(model_name, stages=["None"])[0]
        print(f"Versión del modelo: {model_version.version}")
        
        # Transicionar modelo a Production
        client.transition_model_version_stage(
            name=model_name,
            version=model_version.version,
            stage="Production",
            archive_existing_versions=True
        )
        
        # Añadir descripción al modelo
        client.update_model_version(
            name=model_name,
            version=model_version.version,
            description=f"Mejor modelo para clasificación de pingüinos. F1={best_run['metrics.f1_score']:.4f}"
        )
        
        print(f"✅ Modelo {model_name} v{model_version.version} promovido a Production")
        
    except Exception as e:
        print(f"❌ El modelo no tiene artefactos guardados o no se puede cargar: {e}")
        print("   Esto puede deberse a problemas con el bucket S3.")
        print("   Los experimentos se ejecutaron pero los modelos no se pudieron guardar.")
else:
    print("❌ No hay modelos disponibles para registrar")

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

Successfully registered model 'penguins-classifier'.
2025/09/21 20:05:20 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: penguins-classifier, version 1


✅ Modelo encontrado, procediendo a registrar...
✅ Modelo registrado: penguins-classifier


Created version '1' of model 'penguins-classifier'.


Versión del modelo: 1
✅ Modelo penguins-classifier v1 promovido a Production


## 6. Verificar modelo en Production

In [19]:
# Intentar cargar modelo desde Production si existe
try:
    model_uri = f"models:/{model_name}/Production"
    loaded_model = mlflow.pyfunc.load_model(model_uri)
    
    # Hacer predicción de prueba
    test_input = pd.DataFrame({
        'bill_length_mm': [44.5],
        'bill_depth_mm': [17.1],
        'flipper_length_mm': [200],
        'body_mass_g': [4200]
    })
    
    prediction = loaded_model.predict(test_input)
    predicted_species = species_mapping[prediction[0]]
    
    print(f"Predicción de prueba:")
    print(f"Input: {test_input.values[0]}")
    print(f"Predicción: {predicted_species} (código: {prediction[0]})")
except Exception as e:
    print(f"❌ No se pudo cargar el modelo desde Production: {e}")

Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

Predicción de prueba:
Input: [  44.5   17.1  200.  4200. ]
Predicción: Adelie (código: 0)


In [20]:
# Resumen final
total_runs = mlflow.search_runs(experiment_ids=[experiment.experiment_id])
print(f"\n📊 RESUMEN FINAL:")
print(f"- Total de experimentos realizados: {len(total_runs)}")

if len(runs) > 0:
    print(f"- Mejor modelo: {best_run['params.model_type']}")
    print(f"- F1 Score: {best_run['metrics.f1_score']:.4f}")
    
    try:
        if 'model_version' in locals():
            print(f"- Modelo registrado: {model_name}")
            print(f"- Versión en Production: {model_version.version}")
            print(f"\n✅ Pipeline MLflow completado exitosamente!")
        else:
            print(f"\n⚠️  Pipeline MLflow completado parcialmente")
            print("   Los experimentos se ejecutaron pero hubo problemas con el registro del modelo")
    except:
        print(f"\n⚠️  Pipeline MLflow completado con advertencias")
else:
    print("\n❌ No se completaron experimentos exitosamente")

if not bucket_ready:
    print("\n⚠️  NOTA: Hubo problemas con el bucket S3.")
    print("   Los modelos pueden no haberse guardado correctamente.")
    print("   Verifica la configuración de MinIO y vuelve a ejecutar el notebook.")


📊 RESUMEN FINAL:
- Total de experimentos realizados: 30
- Mejor modelo: SVM
- F1 Score: 1.0000
- Modelo registrado: penguins-classifier
- Versión en Production: 1

✅ Pipeline MLflow completado exitosamente!
