# Evaluación y diagnóstico del mejor modelo


Este cuaderno toma el mejor modelo guardado, calcula nuevamente las métricas sobre el conjunto de prueba y genera gráficos de diagnóstico.


In [1]:
import json
from pathlib import Path

import joblib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

CONFIG_PATH = Path("config.json")
if not CONFIG_PATH.exists():
    CONFIG_PATH = Path("../../config.json").resolve()

with CONFIG_PATH.open(encoding="utf-8") as cfg_file:
    config = json.load(cfg_file)

project_root = CONFIG_PATH.parent
metrics_path = project_root / config["paths"]["metrics_report"]
if not metrics_path.exists():
    raise FileNotFoundError("No se encontró el reporte de métricas. Ejecuta model_training.ipynb primero.")

metrics_df = pd.read_csv(metrics_path)
print("Métricas por modelo registradas en entrenamiento:")
metrics_df


Métricas por modelo registradas en entrenamiento:


Unnamed: 0,model,r2,mae,rmse
0,linear_regression,0.880433,4.214763,5.393994
1,random_forest,0.854425,4.586037,5.951812
2,xgboost,0.849092,4.676387,6.059841
3,decision_tree,0.816951,5.301368,6.674042


In [2]:
best_model_path = project_root / config["paths"]["best_model"]
best_payload = joblib.load(best_model_path)
best_model = best_payload["model"] if isinstance(best_payload, dict) else best_payload

print(f"Modelo cargado: {best_payload.get('model_name', 'desconocido')}")

test_artifact = joblib.load(project_root / config["paths"]["test_set"])
X_test, y_test = test_artifact["X"], test_artifact["y"]
y_pred = best_model.predict(X_test)

r2 = r2_score(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print(f"R2: {r2:.4f} | MAE: {mae:.4f} | RMSE: {rmse:.4f}")

residuals = y_test - y_pred
sns.set_theme(style="whitegrid")
fig, axes = plt.subplots(1, 2, figsize=(10, 4))
sns.scatterplot(x=y_pred, y=residuals, ax=axes[0])
axes[0].axhline(0, color="red", linestyle="--")
axes[0].set_title("Residuos vs Predicción")
axes[0].set_xlabel("Predicción")
axes[0].set_ylabel("Residual")

sns.histplot(residuals, kde=True, ax=axes[1], color="#4C72B0")
axes[1].set_title("Distribución de residuos")
axes[1].set_xlabel("Residual")

fig.tight_layout()
res_plot_path = project_root / "reports" / "residuals_diagnostics.png"
fig.savefig(res_plot_path, dpi=200)
plt.close(fig)
print(f"Gráfico guardado en {res_plot_path}")


Modelo cargado: linear_regression
R2: 0.8804 | MAE: 4.2148 | RMSE: 5.3940
Gráfico guardado en C:\Users\juanp\OneDrive\Escritorio\ML\Proyecto\reports\residuals_diagnostics.png
