In [8]:
# %% 6. Hyperparameter Tuning con GridSearchCV en Notebook

import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import (
    RandomForestRegressor,
    AdaBoostRegressor,
    GradientBoostingRegressor,
    BaggingRegressor
)
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import SGDRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score


# Carga de de datos preprocesados

In [5]:
# %% 0. Asegurar cwd en la raíz del proyecto
import os

# opción 1: con magia de Jupyter
# %cd C:/Users/George/Documents/GitHub/ML-Models-UFM-2025-Airbnb-Pricing-competition/airbnb-price-determinant

# opción 2: con os.chdir
os.chdir(r"C:\Users\George\Documents\GitHub\ML-Models-UFM-2025-Airbnb-Pricing-competition\airbnb-price-determinant")

print("Nuevo cwd:", os.getcwd())

Nuevo cwd: C:\Users\George\Documents\GitHub\ML-Models-UFM-2025-Airbnb-Pricing-competition\airbnb-price-determinant


In [6]:
# 1. Carga preprocesados
df = pd.read_csv("data/02_intermediate/train_processed.csv")
X = df.drop(["id", "realSum"], axis=1)
y = df["realSum"].astype(float)

# 2. Split interno (otra vez) para HPO
X_tr, X_val, y_tr, y_val = train_test_split(
    X, y, test_size=0.2, random_state=42
)

 # Definimos Grids de búsqueda

In [10]:
# %% 3. Define grids de búsqueda
from sklearn.tree import DecisionTreeRegressor  # ya lo tienes importado

param_grids = {
    "RandomForest": {
        "model": RandomForestRegressor(random_state=42),
        "params": {
            "n_estimators": [100, 200],
            "max_depth": [None, 10, 20]
        }
    },
    "GradientBoosting": {
        "model": GradientBoostingRegressor(random_state=42),
        "params": {
            "n_estimators": [100, 200],
            "learning_rate": [0.05, 0.1]
        }
    },
    "AdaBoost": {
        "model": AdaBoostRegressor(
            estimator=DecisionTreeRegressor(max_depth=3),  # <- usa "estimator"
            random_state=42
        ),
        "params": {
            "n_estimators": [50, 100],
            "learning_rate": [0.5, 1.0]
        }
    },
    "Bagging": {
        "model": BaggingRegressor(
            estimator=DecisionTreeRegressor(max_depth=5),  # <- usa "estimator"
            random_state=42
        ),
        "params": {
            "n_estimators": [10, 50],
            "max_samples": [0.8, 1.0]
        }
    },
    "KNN": {
        "model": KNeighborsRegressor(),
        "params": {
            "n_neighbors": [5, 10, 20]
        }
    },
    "SGD": {
        "model": SGDRegressor(random_state=42),
        "params": {
            "alpha": [1e-4, 1e-3, 1e-2],
            "max_iter": [1000, 2000]
        }
    }
}


In [17]:
# %% Carga datasets preprocesados
import pandas as pd

# 1️⃣ Lee los CSVs que guardaste al final del EDA
train_df = pd.read_csv("data/02_intermediate/train_processed.csv")
valid_df = pd.read_csv("data/02_intermediate/valid_processed.csv")

# 2️⃣ Separa características y target
X_train_proc = train_df.drop(["id", "realSum"], axis=1)
y_train      = train_df["realSum"].astype(float)
X_valid_proc = valid_df.drop(["id", "realSum"], axis=1)
y_valid      = valid_df["realSum"].astype(float)

# 3️⃣ Comprueba que todo cargó correctamente
print("▶️ Shapes:")
print("   X_train_proc:", X_train_proc.shape)
print("   X_valid_proc:", X_valid_proc.shape)
print("   y_train     :", y_train.shape)
print("   y_valid     :", y_valid.shape)


▶️ Shapes:
   X_train_proc: (28956, 25)
   X_valid_proc: (7239, 25)
   y_train     : (28956,)
   y_valid     : (7239,)


# Verificación de Data

In [26]:
# %% 5.x+ Sanity‐check final e imputación de target

import numpy as np

# 1️⃣ Imputar sólo características con la media de X_train
train_means = X_train_proc.mean()
X_train_proc = X_train_proc.fillna(train_means)
X_valid_proc = X_valid_proc.fillna(train_means)

# 2️⃣ Filtrar filas donde target o features tengan NaNs
mask_train = (~X_train_proc.isna().any(axis=1)) & (~y_train.isna())
mask_valid = (~X_valid_proc.isna().any(axis=1)) & (~y_valid.isna())

X_train_proc = X_train_proc.loc[mask_train]
y_train      = y_train.loc[mask_train]
X_valid_proc = X_valid_proc.loc[mask_valid]
y_valid      = y_valid.loc[mask_valid]

# 3️⃣ Asegurémonos de que ya no queden NaNs
assert X_train_proc.isna().sum().sum() == 0
assert X_valid_proc.isna().sum().sum() == 0
assert y_train.isna().sum() == 0
assert y_valid.isna().sum() == 0

# 5.x.1) Comprueba cuántos NaNs hay en features y target
print("NaNs en X_train_proc:", X_train_proc.isna().sum().sum())
print("NaNs en X_valid_proc:", X_valid_proc.isna().sum().sum())
print("NaNs en y_train     :", y_train.isna().sum())
print("NaNs en y_valid     :", y_valid.isna().sum())

print("✅ Después de todo, no quedan NaNs:")
print(f"   X_train_proc: {X_train_proc.shape}, y_train: {y_train.shape}")
print(f"   X_valid_proc: {X_valid_proc.shape}, y_valid: {y_valid.shape}")


NaNs en X_train_proc: 0
NaNs en X_valid_proc: 0
NaNs en y_train     : 0
NaNs en y_valid     : 0
✅ Después de todo, no quedan NaNs:
   X_train_proc: (28956, 25), y_train: (28956,)
   X_valid_proc: (7238, 25), y_valid: (7238,)


# Random Forest

In [49]:
# %% 6.1 Hyperparameter Tuning Random Forest con cronómetro y append a resultados

import time
import threading
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# ——————————————————————————————————————————————
# 0️⃣ Inicializa la lista de resultados (solo la primera vez)
try:
    results
except NameError:
    results = []

# 1️⃣ Asegúrate de tener en memoria:
#    X_train_proc, y_train, X_valid_proc, y_valid

# 2️⃣ Define tu modelo y grid
rf = RandomForestRegressor(random_state=42)
rf_param_grid = {
    "n_estimators": [100, 200],
    "max_depth":    [None, 10, 20]
}

gs_rf = GridSearchCV(
    estimator=rf,
    param_grid=rf_param_grid,
    cv=5,
    scoring="neg_mean_squared_error",
    n_jobs=-1,
    verbose=1  # para ver progreso de cada fold
)

# 3️⃣ Cronómetro en un hilo aparte (imprime cada 60 s)
stop_timer = False
def _print_timer():
    mins = 0
    while not stop_timer:
        time.sleep(60)
        mins += 1
        print(f"⏱️ Elapsed: {mins} min")

timer_thread = threading.Thread(target=_print_timer, daemon=True)
timer_thread.start()

# 4️⃣ Fit + medición total
start_time = time.time()
print("🔎 Random Forest — tuning hiperparámetros…")
gs_rf.fit(X_train_proc, y_train)

# parar cronómetro
stop_timer = True
timer_thread.join()

total_min = (time.time() - start_time) / 60
print(f"\n✅ GridSearchCV completado en {total_min:.2f} min")

# 5️⃣ Evaluación sobre validación
best_rf = gs_rf.best_estimator_
preds_rf = best_rf.predict(X_valid_proc)

mse_rf  = mean_squared_error(y_valid, preds_rf)
rmse_rf = mse_rf ** 0.5
mae_rf  = mean_absolute_error(y_valid, preds_rf)
r2_rf   = r2_score(y_valid, preds_rf)

print(f"✔ Random Forest — Best params: {gs_rf.best_params_}")
print(f"    MAE:  {mae_rf:,.2f} | RMSE: {rmse_rf:,.2f} | R²: {r2_rf:.4f}")

# 6️⃣ Append a la lista de resultados
results.append({
    "model":       "RandomForest",
    "best_params": gs_rf.best_params_,
    "mae":         mae_rf,
    "rmse":        rmse_rf,
    "r2":          r2_rf
})


🔎 Random Forest — tuning hiperparámetros…
Fitting 5 folds for each of 6 candidates, totalling 30 fits
⏱️ Elapsed: 1 min
⏱️ Elapsed: 2 min
⏱️ Elapsed: 3 min

✅ GridSearchCV completado en 3.00 min
✔ Random Forest — Best params: {'max_depth': None, 'n_estimators': 100}
    MAE:  62.89 | RMSE: 185.66 | R²: 0.5778


# Gradient Boosting

In [50]:
# %% 6.2 Hyperparameter Tuning Gradient Boosting (Opción A) con cronómetro y append

import time
import threading
import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# ——————————————————————————————————————————————
# 0️⃣ Inicializa la lista de resultados si aún no existe
try:
    results
except NameError:
    results = []

# 1️⃣ Asegúrate de tener en memoria:
#    X_train_proc, y_train, X_valid_proc, y_valid

# 2️⃣ Pipeline y grid de búsqueda
pipe_gb = Pipeline([
    ("model", GradientBoostingRegressor(random_state=42))
])
gb_param_grid = {
    "model__n_estimators":    [100, 200],
    "model__learning_rate":   [0.05, 0.1]
}

gs_gb = GridSearchCV(
    estimator=pipe_gb,
    param_grid=gb_param_grid,
    cv=5,
    scoring="neg_mean_squared_error",
    n_jobs=-1,
    verbose=1
)

# 3️⃣ Cronómetro en hilo aparte (imprime cada 60 s)
stop_timer_gb = False
def _print_timer_gb():
    mins = 0
    while not stop_timer_gb:
        time.sleep(60)
        mins += 1
        print(f"⏱️ Elapsed: {mins} min")

timer_thread_gb = threading.Thread(target=_print_timer_gb, daemon=True)
timer_thread_gb.start()

# 4️⃣ Fit + medición total
start_time = time.time()
print("🔎 Gradient Boosting — tuning hiperparámetros…")
gs_gb.fit(X_train_proc, y_train)
stop_timer_gb = True
timer_thread_gb.join()

total_min = (time.time() - start_time) / 60
print(f"\n✅ GridSearchCV completado en {total_min:.2f} min")

# 5️⃣ Evaluación sobre validación
best_gb   = gs_gb.best_estimator_
preds_gb  = best_gb.predict(X_valid_proc)

mse_gb    = mean_squared_error(y_valid, preds_gb)
rmse_gb   = np.sqrt(mse_gb)
mae_gb    = mean_absolute_error(y_valid, preds_gb)
r2_gb     = r2_score(y_valid, preds_gb)

print(f"✔ Gradient Boosting — Best params: {gs_gb.best_params_}")
print(f"    MAE:  {mae_gb:,.2f} | RMSE: {rmse_gb:,.2f} | R²: {r2_gb:.4f}")

# 6️⃣ Append a la lista de resultados
results.append({
    "model":       "GradientBoosting",
    "best_params": gs_gb.best_params_,
    "mae":         mae_gb,
    "rmse":        rmse_gb,
    "r2":          r2_gb
})


🔎 Gradient Boosting — tuning hiperparámetros…
Fitting 5 folds for each of 4 candidates, totalling 20 fits
⏱️ Elapsed: 1 min

✅ GridSearchCV completado en 1.00 min
✔ Gradient Boosting — Best params: {'model__learning_rate': 0.05, 'model__n_estimators': 200}
    MAE:  82.10 | RMSE: 236.67 | R²: 0.3139


# AdaBoost

In [51]:
# %% 6.3 Hyperparameter Tuning AdaBoost (Opción A) con cronómetro y append

import time
import numpy as np
from sklearn.ensemble import AdaBoostRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# ——————————————————————————————————————————————
# 0️⃣ Asegúrate de que la lista `results` exista
try:
    results
except NameError:
    results = []

# 1️⃣ Define el grid para AdaBoost directamente sobre X_train_proc
ab_param_grid = {
    "n_estimators":  [50, 100],
    "learning_rate": [0.5, 1.0]
}

# 2️⃣ Prepara y lanza el GridSearchCV
print("🔎 AdaBoost — tuning hiperparámetros sobre X_train_proc…")
t0 = time.time()
gs_ab = GridSearchCV(
    estimator=AdaBoostRegressor(
        estimator=DecisionTreeRegressor(max_depth=3),
        random_state=42
    ),
    param_grid=ab_param_grid,
    cv=5,
    scoring="neg_mean_squared_error",
    n_jobs=-1,
    verbose=1,
    error_score="raise"
)
gs_ab.fit(X_train_proc, y_train)
elapsed_min = (time.time() - t0) / 60
print(f"\n✅ GridSearchCV AdaBoost completado en {elapsed_min:.2f} min")

# 3️⃣ Evaluación en validación
best_ab  = gs_ab.best_estimator_
preds_ab = best_ab.predict(X_valid_proc)

mse_ab   = mean_squared_error(y_valid, preds_ab)
rmse_ab  = np.sqrt(mse_ab)
mae_ab   = mean_absolute_error(y_valid, preds_ab)
r2_ab    = r2_score(y_valid, preds_ab)

print(f"✔ AdaBoost — Mejores parámetros: {gs_ab.best_params_}")
print(f"    MAE:  {mae_ab:,.2f} | RMSE: {rmse_ab:,.2f} | R²: {r2_ab:.4f}")

# 4️⃣ Append a la lista de resultados
results.append({
    "model":       "AdaBoost",
    "best_params": gs_ab.best_params_,
    "mae":         mae_ab,
    "rmse":        rmse_ab,
    "r2":          r2_ab
})


🔎 AdaBoost — tuning hiperparámetros sobre X_train_proc…
Fitting 5 folds for each of 4 candidates, totalling 20 fits

✅ GridSearchCV AdaBoost completado en 0.06 min
✔ AdaBoost — Mejores parámetros: {'learning_rate': 0.5, 'n_estimators': 50}
    MAE:  135.34 | RMSE: 267.04 | R²: 0.1265


# Bagging

In [52]:
# %% 6.4 Hyperparameter Tuning BaggingRegressor (Opción A) con cronómetro y append

import time
import numpy as np
from sklearn.ensemble import BaggingRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# ——————————————————————————————————————————————
# 0️⃣ Asegúrate de que la lista `results` exista
try:
    results
except NameError:
    results = []

# 1️⃣ Define el grid para Bagging directamente sobre X_train_proc
bg_param_grid = {
    "n_estimators": [10, 50],
    "max_samples":  [0.8, 1.0]
}

# 2️⃣ Prepara y lanza el GridSearchCV
print("🔎 Bagging — tuning hiperparámetros sobre X_train_proc…")
t0 = time.time()
gs_bg = GridSearchCV(
    estimator=BaggingRegressor(
        estimator=DecisionTreeRegressor(max_depth=5),
        random_state=42
    ),
    param_grid=bg_param_grid,
    cv=5,
    scoring="neg_mean_squared_error",
    n_jobs=-1,
    verbose=1,
    error_score="raise"
)
gs_bg.fit(X_train_proc, y_train)
elapsed_min = (time.time() - t0) / 60
print(f"\n✅ GridSearchCV Bagging completado en {elapsed_min:.2f} min")

# 3️⃣ Evaluación en validación
best_bg  = gs_bg.best_estimator_
preds_bg = best_bg.predict(X_valid_proc)

mse_bg   = mean_squared_error(y_valid, preds_bg)
rmse_bg  = np.sqrt(mse_bg)
mae_bg   = mean_absolute_error(y_valid, preds_bg)
r2_bg    = r2_score(y_valid, preds_bg)

print(f"✔ Bagging — Mejores parámetros: {gs_bg.best_params_}")
print(f"    MAE:  {mae_bg:,.2f} | RMSE: {rmse_bg:,.2f} | R²: {r2_bg:.4f}")

# 4️⃣ Append a la lista de resultados
results.append({
    "model":       "Bagging",
    "best_params": gs_bg.best_params_,
    "mae":         mae_bg,
    "rmse":        rmse_bg,
    "r2":          r2_bg
})

🔎 Bagging — tuning hiperparámetros sobre X_train_proc…
Fitting 5 folds for each of 4 candidates, totalling 20 fits

✅ GridSearchCV Bagging completado en 0.22 min
✔ Bagging — Mejores parámetros: {'max_samples': 1.0, 'n_estimators': 50}
    MAE:  90.91 | RMSE: 230.34 | R²: 0.3501


# K-Nearest Neighbors

In [53]:
# %% K-Nearest Neighbors tuning sobre datos preprocesados + timing

import time
import numpy as np
from sklearn.neighbors import KNeighborsRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# ——————————————————————————————————————————————
# 0️⃣ Asegúrate de que la lista `results` exista
try:
    results
except NameError:
    results = []

# 1️⃣ Definimos el parámetro grid para KNN
knn_param_grid = {
    "n_neighbors": [5, 10, 20]
}

# 2️⃣ Creamos y ejecutamos el GridSearchCV directamente sobre X_train_proc
print("🔎 KNN — tuning hiperparámetros sobre X_train_proc…")
t0 = time.time()
gs_knn = GridSearchCV(
    estimator=KNeighborsRegressor(),
    param_grid=knn_param_grid,
    cv=5,
    scoring="neg_mean_squared_error",
    n_jobs=-1,
    verbose=1,
    error_score="raise"
)
gs_knn.fit(X_train_proc, y_train)
elapsed_min = (time.time() - t0) / 60
print(f"\n✅ GridSearchCV KNN completado en {elapsed_min:.2f} min")

# 3️⃣ Evaluación en validación
best_knn  = gs_knn.best_estimator_
preds_knn = best_knn.predict(X_valid_proc)

mse_knn   = mean_squared_error(y_valid, preds_knn)
rmse_knn  = np.sqrt(mse_knn)
mae_knn   = mean_absolute_error(y_valid, preds_knn)
r2_knn    = r2_score(y_valid, preds_knn)

print(f"✔ KNN — Mejores parámetros: {gs_knn.best_params_}")
print(f"    MAE:  {mae_knn:,.2f} | RMSE: {rmse_knn:,.2f} | R²: {r2_knn:.4f}")

# 4️⃣ Append a la lista de resultados
results.append({
    "model":       "KNN",
    "best_params": gs_knn.best_params_,
    "mae":         mae_knn,
    "rmse":        rmse_knn,
    "r2":          r2_knn
})

🔎 KNN — tuning hiperparámetros sobre X_train_proc…
Fitting 5 folds for each of 3 candidates, totalling 15 fits

✅ GridSearchCV KNN completado en 0.04 min
✔ KNN — Mejores parámetros: {'n_neighbors': 20}
    MAE:  83.67 | RMSE: 234.20 | R²: 0.3282


# SGD Regressor

In [54]:
# %% SGDRegressor tuning sobre datos preprocesados + timing

import time
import numpy as np
from sklearn.linear_model import SGDRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# ——————————————————————————————————————————————
# 0️⃣ Asegúrate de que la lista `results` exista
try:
    results
except NameError:
    results = []

# 1️⃣ Define el grid para SGDRegressor
sgd_param_grid = {
    "alpha":    [1e-4, 1e-3, 1e-2],
    "max_iter": [1000, 2000]
}

# 2️⃣ Lanza la búsqueda de hiperparámetros directamente sobre X_train_proc
print("🔎 SGDRegressor — tuning hiperparámetros sobre X_train_proc…")
t0 = time.time()
gs_sgd = GridSearchCV(
    estimator=SGDRegressor(random_state=42),
    param_grid=sgd_param_grid,
    cv=5,
    scoring="neg_mean_squared_error",
    n_jobs=-1,
    verbose=1,
    error_score="raise",
)
gs_sgd.fit(X_train_proc, y_train)
elapsed_min = (time.time() - t0) / 60
print(f"\n✅ GridSearchCV SGDRegressor completado en {elapsed_min:.2f} min")

# 3️⃣ Evalúa en validación
best_sgd  = gs_sgd.best_estimator_
preds_sgd = best_sgd.predict(X_valid_proc)

mse_sgd   = mean_squared_error(y_valid, preds_sgd)
rmse_sgd  = np.sqrt(mse_sgd)
mae_sgd   = mean_absolute_error(y_valid, preds_sgd)
r2_sgd    = r2_score(y_valid, preds_sgd)

print(f"✔ SGDRegressor — Mejores parámetros: {gs_sgd.best_params_}")
print(f"    MAE:  {mae_sgd:,.2f} | RMSE: {rmse_sgd:,.2f} | R²: {r2_sgd:.4f}")

# 4️⃣ Append a la lista de resultados
results.append({
    "model":       "SGDRegressor",
    "best_params": gs_sgd.best_params_,
    "mae":         mae_sgd,
    "rmse":        rmse_sgd,
    "r2":          r2_sgd
})


🔎 SGDRegressor — tuning hiperparámetros sobre X_train_proc…
Fitting 5 folds for each of 6 candidates, totalling 30 fits

✅ GridSearchCV SGDRegressor completado en 0.02 min
✔ SGDRegressor — Mejores parámetros: {'alpha': 0.0001, 'max_iter': 1000}
    MAE:  101.24 | RMSE: 244.21 | R²: 0.2695


# Stacking

In [55]:
# %% 7. Stacking (meta) — tuning hiperparámetros sobre datos preprocesados + timing

import time
import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.ensemble import StackingRegressor, GradientBoostingRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# ——————————————————————————————————————————————
# 0️⃣ Asegúrate de que `results` exista
try:
    results
except NameError:
    results = []

# 1️⃣ Define el pipeline con el StackingRegressor                                             
pipe_stack = Pipeline([
    (
        "stack",
        StackingRegressor(
            estimators=[
                ("rf",  best_rf),
                ("gb",  best_gb),
                ("ab",  best_ab),
                ("bg",  best_bg),
                ("knn", best_knn),
                ("sgd", best_sgd),
            ],
            final_estimator=GradientBoostingRegressor(random_state=42),
            n_jobs=-1,
            passthrough=False,
        ),
    )
])

# 2️⃣ Grid de hiperparámetros para el meta‑estimador
param_grid_stack = {
    "stack__final_estimator__n_estimators":  [50, 100, 200],
    "stack__final_estimator__learning_rate": [0.01, 0.05, 0.1],
    "stack__final_estimator__max_depth":     [3, 5, 7],
}

# 3️⃣ Lanza el GridSearchCV
print("🔎 Stacking (meta) — tuning hiperparámetros…")
t0 = time.time()
gs_stack = GridSearchCV(
    estimator=pipe_stack,
    param_grid=param_grid_stack,
    cv=5,
    scoring="neg_mean_squared_error",
    n_jobs=-1,
    verbose=1,
    error_score="raise"
)
gs_stack.fit(X_train_proc, y_train)
elapsed_min = (time.time() - t0) / 60
print(f"✅ GridSearchCV Stacking completado en {elapsed_min:.2f} min")

# 4️⃣ Evaluación sobre validación
best_stack = gs_stack.best_estimator_
preds_st   = best_stack.predict(X_valid_proc)

mse_st  = mean_squared_error(y_valid, preds_st)    # MSE
rmse_st = np.sqrt(mse_st)                          # RMSE manual
mae_st  = mean_absolute_error(y_valid, preds_st)
r2_st   = r2_score(y_valid, preds_st)

print(f"✔ Stacking — Best meta‑params: {gs_stack.best_params_}")
print(f"    MAE:  {mae_st:,.2f} | RMSE: {rmse_st:,.2f} | R²: {r2_st:.4f}")

# 5️⃣ Append a la lista de resultados
results.append({
    "model":       "Stacking",
    "best_params": gs_stack.best_params_,
    "mae":         mae_st,
    "rmse":        rmse_st,
    "r2":          r2_st
})

🔎 Stacking (meta) — tuning hiperparámetros…
Fitting 5 folds for each of 27 candidates, totalling 135 fits
✅ GridSearchCV Stacking completado en 70.58 min
✔ Stacking — Best meta‑params: {'stack__final_estimator__learning_rate': 0.05, 'stack__final_estimator__max_depth': 3, 'stack__final_estimator__n_estimators': 100}
    MAE:  65.73 | RMSE: 201.95 | R²: 0.5005


In [57]:
import pandas as pd
df_results = pd.DataFrame(results).sort_values("rmse").reset_index(drop=True)
df_results

Unnamed: 0,model,best_params,mae,rmse,r2
0,RandomForest,"{'max_depth': None, 'n_estimators': 100}",62.885952,185.658486,0.577802
1,GradientBoosting,"{'max_depth': None, 'n_estimators': 100}",62.885952,185.658486,0.577802
2,AdaBoost,"{'max_depth': None, 'n_estimators': 100}",62.885952,185.658486,0.577802
3,Bagging,"{'max_depth': None, 'n_estimators': 100}",62.885952,185.658486,0.577802
4,KNN,"{'max_depth': None, 'n_estimators': 100}",62.885952,185.658486,0.577802
5,SGDRegressor,"{'max_depth': None, 'n_estimators': 100}",62.885952,185.658486,0.577802
6,RandomForest,"{'max_depth': None, 'n_estimators': 100}",62.885952,185.658486,0.577802
7,Stacking,{'stack__final_estimator__learning_rate': 0.05...,65.727598,201.945329,0.500478
8,Bagging,"{'max_samples': 1.0, 'n_estimators': 50}",90.90642,230.339274,0.350136
9,KNN,{'n_neighbors': 20},83.67227,234.198086,0.328179
