In [6]:
# Training Pipeline Notebook

import pandas as pd
import numpy as np
import pickle
import joblib
from lazypredict.Supervised import LazyRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.linear_model import HuberRegressor
import mlflow
import mlflow.sklearn

# 1. Cargar los datos procesados
with open("processed_data.pkl", "rb") as f:
    X_train, X_test, y_train, y_test = pickle.load(f)

# 2. LazyPredict para probar varios modelos
reg = LazyRegressor()
models, predictions = reg.fit(X_train, X_test, y_train, y_test)
print(models)

# 3. Seleccionar el mejor modelo según el MSE o MAE
best_model_name = models.index[0]  # Tomando el modelo con mejor rendimiento
print(f"Mejor modelo: {best_model_name}")

# 4. Registrar el mejor modelo en MLFlow
mlflow.set_experiment("Gold Price Regression")

with mlflow.start_run():
    # Crear y entrenar el modelo HuberRegressor
    model = HuberRegressor()
    model.fit(X_train, y_train)

    # Registro del modelo en MLflow
    mlflow.sklearn.log_model(model, "model")
    
    # Realizar predicciones y calcular métricas
    mse = mean_squared_error(y_test, model.predict(X_test))
    mae = mean_absolute_error(y_test, model.predict(X_test))
    
    # Registrar las métricas en MLflow
    mlflow.log_metric("MSE", mse)
    mlflow.log_metric("MAE", mae)
    
    # Imprimir las métricas
    print(f"MSE: {mse}, MAE: {mae}")

     # Guardar el modelo entrenado en un archivo local usando joblib
    model_path = "huber_regressor_model.pkl"
    joblib.dump(model, model_path)
    print(f"Modelo guardado en {model_path}")

100%|██████████| 42/42 [00:08<00:00,  4.73it/s]


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000062 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1020
[LightGBM] [Info] Number of data points in the train set: 4562, number of used features: 4
[LightGBM] [Info] Start training from score 1044.722052
                               Adjusted R-Squared  R-Squared    RMSE  \
Model                                                                  
HuberRegressor                               1.00       1.00    4.66   
LassoLarsCV                                  1.00       1.00    4.69   
Lars                                         1.00       1.00    4.69   
OrthogonalMatchingPursuitCV                  1.00       1.00    4.69   
LassoLarsIC                                  1.00       1.00    4.69   
LarsCV                                       1.00       1.00    4.69   
TransformedTargetRegressor                   1.00       1.00    4.69   
RANSACReg



MSE: 21.75748639120675, MAE: 2.9515764445855925
Modelo guardado en huber_regressor_model.pkl
