In [1]:
# Importar librerías
import pandas as pd
import json
import joblib
import os

In [2]:
# Comparación de Modelos
models_dir = '../../models/'
model_comparison = {}

model_files = {
    "Linear Regression": "linear_regression",
    "Random Forest": "random_forest",
    "Gradient Boosting": "gradient_boosting",
    "XGBoost": "xgboost",
    "Random Forest Tuned": "random_forest_tuned",
    "Gradient Boosting Tuned": "gradient_boosting_tuned",
    "XGBoost Tuned": "xgboost_tuned"
}

for model_name, file_prefix in model_files.items():
    metrics_path = os.path.join(models_dir, f"{file_prefix}_metrics.json")
    pipeline_path = os.path.join(models_dir, f"{file_prefix}_pipeline.joblib")
    
    if os.path.exists(pipeline_path) and os.path.exists(metrics_path):
        with open(metrics_path, 'r') as f:
            metrics = json.load(f)
        model_comparison[model_name] = {
            "R² (Prueba)": metrics['test_metrics']['r2'],
            "RMSE (Prueba)": metrics['test_metrics']['rmse'],
            "MAE (Prueba)": metrics['test_metrics']['mae'],
            "Tiempo de entrenamiento (s)": metrics.get('training_time', 'N/A'),
            "Tiempo de predicción (s)": metrics.get('prediction_time', 'N/A')
        }

# Crear DataFrame para visualización
comparison_df = pd.DataFrame(model_comparison).T
comparison_df = comparison_df.sort_values(by="R² (Prueba)", ascending=False)

print("Comparación de Modelos:")
display(comparison_df)

Comparación de Modelos:


Unnamed: 0,R² (Prueba),RMSE (Prueba),MAE (Prueba),Tiempo de entrenamiento (s),Tiempo de predicción (s)
Random Forest Tuned,0.805242,12799.066542,7239.970685,16.268839,0.027672
XGBoost Tuned,0.804433,12825.61392,7571.263868,4.515549,0.003255
XGBoost,0.797995,13035.013988,7959.820767,0.093971,0.002206
Gradient Boosting Tuned,0.79235,13215.911319,7760.551597,14.036204,0.006338
Random Forest,0.781103,13569.087777,8169.500503,0.500972,0.015496
Gradient Boosting,0.76972,13917.425139,8818.689028,1.003435,0.00291
Linear Regression,0.470845,21097.08459,14326.372073,0.011445,0.00149


In [3]:
# Decidir el modelo final
best_model_name = comparison_df.index[0]
print(f"\nEl modelo seleccionado por R² es: {best_model_name}")


El modelo seleccionado por R² es: Random Forest Tuned


In [4]:
best_model_name = comparison_df.sort_values('Tiempo de predicción (s)').index[0]
print(f"\nEl modelo seleccionado por tiempo de predicción es: {best_model_name}")


El modelo seleccionado por tiempo de predicción es: Linear Regression


El mejor modelo es XGBoost, por lo que se guarda como el modelo final.