In [6]:
# Importar librerías
import pandas as pd
import json
import joblib
import os

In [7]:
# Comparación de Modelos
models_dir = '../../models/'
model_comparison = {}

model_files = {
    "Linear Regression": "linear_regression",
    "Random Forest": "random_forest",
    "Gradient Boosting": "gradient_boosting",
    "XGBoost": "xgboost",
    "Random Forest Tuned": "random_forest_tuned",
    "Gradient Boosting Tuned": "gradient_boosting_tuned",
    "XGBoost Tuned": "xgboost_tuned"
}

for model_name, file_prefix in model_files.items():
    metrics_path = os.path.join(models_dir, f"{file_prefix}_metrics.json")
    pipeline_path = os.path.join(models_dir, f"{file_prefix}_pipeline.joblib")
    
    if os.path.exists(pipeline_path) and os.path.exists(metrics_path):
        with open(metrics_path, 'r') as f:
            metrics = json.load(f)
        model_comparison[model_name] = {
            "R² (Prueba)": metrics['test_metrics']['r2'],
            "RMSE (Prueba)": metrics['test_metrics']['rmse'],
            "MAE (Prueba)": metrics['test_metrics']['mae'],
            "Tiempo de entrenamiento (s)": metrics.get('training_time', 'N/A'),
            "Tiempo de predicción (s)": metrics.get('prediction_time', 'N/A')
        }

# Crear DataFrame para visualización
comparison_df = pd.DataFrame(model_comparison).T
comparison_df = comparison_df.sort_values(by="R² (Prueba)", ascending=False)

print("Comparación de Modelos:")
display(comparison_df)

Comparación de Modelos:


Unnamed: 0,R² (Prueba),RMSE (Prueba),MAE (Prueba),Tiempo de entrenamiento (s),Tiempo de predicción (s)
Random Forest Tuned,0.768061,14202.124292,8230.101983,6.794373,0.014704
Random Forest,0.765918,14267.589113,8574.837059,0.215348,0.014613
XGBoost,0.749952,14746.135525,8733.0285,0.130599,0.002763
XGBoost Tuned,0.748134,14799.632362,8938.402475,3.62357,0.002123
Gradient Boosting,0.738597,15077.243769,9177.843422,0.290319,0.001988
Gradient Boosting Tuned,0.738597,15077.243769,9177.843422,4.900291,0.001928
Linear Regression,0.522696,20373.42102,13038.105589,0.004533,0.000465


In [8]:
# Decidir el modelo final
best_model_name = comparison_df.index[0]
print(f"\nEl modelo seleccionado por R² es: {best_model_name}")


El modelo seleccionado por R² es: Random Forest Tuned


In [9]:
best_model_name = comparison_df.sort_values('Tiempo de predicción (s)').index[0]
print(f"\nEl modelo seleccionado por tiempo de predicción es: {best_model_name}")


El modelo seleccionado por tiempo de predicción es: Linear Regression


El mejor modelo es XGBoost, por lo que se guarda como el modelo final.