In [2]:
from meta_model import *

In [3]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import cross_val_score
from sklearn.base import BaseEstimator, RegressorMixin, clone
from sklearn.utils import check_random_state
from sklearn.model_selection import KFold

In [4]:
# --- CARGA DE DATOS ---
house_prices_df = pd.read_csv("../data/house_prices.csv")
parkinsons_df = pd.read_csv("../data/parkinsons.csv")

In [4]:
# --- PREPROCESADO HOUSE_PRICES ---
X_house = house_prices_df.drop(columns="SalePrice").copy()
y_house = house_prices_df["SalePrice"].copy()

In [5]:
# Codificar variables categóricas con LabelEncoder
for col in X_house.select_dtypes(include=["object"]).columns:
    X_house[col] = LabelEncoder().fit_transform(X_house[col].astype(str))

In [6]:
# --- PREPROCESADO PARKINSONS ---
X_park = parkinsons_df.drop(columns="total_UPDRS").copy()
y_park = parkinsons_df["total_UPDRS"].copy()

In [7]:
# --- MOSTRAR DIMENSIONES RESULTANTES ---
print("HOUSE PRICES:", X_house.shape, y_house.shape)
print("PARKINSONS:", X_park.shape, y_park.shape)

HOUSE PRICES: (560, 37) (560,)
PARKINSONS: (2000, 19) (2000,)


In [8]:
kf = KFold(n_splits=5, shuffle=True, random_state=42)

In [9]:
# Entrenamiento y validación cruzada con dataset de Parkinson
model = SequentialEnsembleRegressor(
    base_estimator=DecisionTreeRegressor(max_depth=3),
    n_estimators=100,
    sample_size=0.8,
    lr=0.1,
    random_state=42,
)

scores = cross_val_score(model, X_park.values, y_park.values, cv=kf, scoring='r2')

print("R² medio (Parkinson):", np.round(scores.mean(), 4))

R² medio (Parkinson): 0.7607


In [11]:
# Valores a explorar manualmente
param_grid = {
    "n_estimators": [50, 100],
    "lr": [0.05, 0.1],
    "sample_size": [0.8],
    "max_depth": [3, 5, 10]
}

# Entrenamiento y validación cruzada con dataset de Parkinson y diferentes hiperparámetros
df_resultados = explorar_hiperparametros(
    estimator_class=DecisionTreeRegressor,
    param_grid=param_grid,
    X=X_park.values,
    y=y_park.values,
    cv=kf
)

# Mostrar resultados
df_resultados

Unnamed: 0,n_estimators,lr,sample_size,max_depth,r2_mean
8,100,0.05,0.8,10,0.9386
11,100,0.1,0.8,10,0.9356
5,50,0.1,0.8,10,0.9345
2,50,0.05,0.8,10,0.9215
10,100,0.1,0.8,5,0.8865
7,100,0.05,0.8,5,0.8521
4,50,0.1,0.8,5,0.8481
1,50,0.05,0.8,5,0.7765
9,100,0.1,0.8,3,0.76
3,50,0.1,0.8,3,0.6644


In [14]:
# Entrenamiento y validación cruzada con dataset de House Prices
model = SequentialEnsembleRegressor(
    base_estimator=DecisionTreeRegressor(max_depth=10),
    n_estimators=100,
    sample_size=0.8,
    lr=0.1,
    random_state=42,
)

scores = cross_val_score(model, X_house.values, y_house.values, cv=kf, scoring='r2')

print("R² medio (House Prices):", np.round(scores.mean(), 4))

R² medio (House Prices): 0.7079


In [21]:
# Valores a explorar manualmente
param_grid = {
    "n_estimators": [50, 100, 120],
    "lr": [0.05, 0.1],
    "sample_size": [0.6, 0.8, 1.0],
    "max_depth": [3, 5, 10]
}

# Entrenamiento y validación cruzada con dataset de Parkinson y diferentes hiperparámetros
df_resultados = explorar_hiperparametros(
    estimator_class=DecisionTreeRegressor,
    param_grid=param_grid,
    X=X_house.values,
    y=y_house.values,
    cv=kf
)

# Mostrar resultados
df_resultados

Unnamed: 0,n_estimators,lr,sample_size,max_depth,r2_mean
13,50,0.1,0.8,5,0.7688
40,120,0.05,0.8,5,0.7631
22,100,0.05,0.8,5,0.7625
28,100,0.1,0.6,5,0.7597
11,50,0.1,0.6,10,0.7556
31,100,0.1,0.8,5,0.7554
10,50,0.1,0.6,5,0.7516
49,120,0.1,0.8,5,0.7471
20,100,0.05,0.6,10,0.7462
46,120,0.1,0.6,5,0.7459


In [22]:
# Valores a explorar manualmente
param_grid = {
    "n_estimators": [100, 150, 200],
    "lr": [0.01, 0.02, 0.05],
    "sample_size": [0.7, 0.8, 0.85, 0.9],
    "max_depth": [5, 6, 7]
}

# Entrenamiento y validación cruzada con dataset de Parkinson y diferentes hiperparámetros
df_resultados = explorar_hiperparametros(
    estimator_class=DecisionTreeRegressor,
    param_grid=param_grid,
    X=X_house.values,
    y=y_house.values,
    cv=kf
)

# Mostrar resultados
df_resultados

Unnamed: 0,n_estimators,lr,sample_size,max_depth,r2_mean
66,150,0.05,0.85,5,0.7664
69,150,0.05,0.90,5,0.7651
63,150,0.05,0.80,5,0.7637
27,100,0.05,0.80,5,0.7618
99,200,0.05,0.80,5,0.7592
...,...,...,...,...,...
0,100,0.01,0.70,5,0.6223
10,100,0.01,0.90,6,0.6208
3,100,0.01,0.80,5,0.6201
6,100,0.01,0.85,5,0.6196


In [23]:
# Valores a explorar manualmente
param_grid = {
    "n_estimators": [50, 100, 150],
    "lr": [0.05, 0.1],
    "sample_size": [0.8, 0.85, 0.9],
    "max_depth": [5, 6, 7]
}

# Entrenamiento y validación cruzada con dataset de Parkinson y diferentes hiperparámetros
df_resultados = explorar_hiperparametros(
    estimator_class=DecisionTreeRegressor,
    param_grid=param_grid,
    X=X_house.values,
    y=y_house.values,
    cv=kf
)

# Mostrar resultados
df_resultados

Unnamed: 0,n_estimators,lr,sample_size,max_depth,r2_mean
30,100,0.1,0.85,5,0.7665
31,100,0.1,0.85,6,0.7652
12,50,0.1,0.85,5,0.762
33,100,0.1,0.9,5,0.7619
24,100,0.05,0.9,5,0.7571
27,100,0.1,0.8,5,0.7569
36,150,0.05,0.8,5,0.7565
15,50,0.1,0.9,5,0.7564
49,150,0.1,0.85,6,0.7563
34,100,0.1,0.9,6,0.7557


In [24]:
# Valores a explorar manualmente
param_grid = {
    "n_estimators": [50, 100, 150],
    "lr": [0.05, 0.75, 0.1],
    "sample_size": [0.8, 0.85, 0.9],
    "max_depth": [4, 5, 6]
}

# Entrenamiento y validación cruzada con dataset de Parkinson y diferentes hiperparámetros
df_resultados = explorar_hiperparametros(
    estimator_class=DecisionTreeRegressor,
    param_grid=param_grid,
    X=X_house.values,
    y=y_house.values,
    cv=kf
)

# Mostrar resultados
df_resultados

Unnamed: 0,n_estimators,lr,sample_size,max_depth,r2_mean
61,150,0.05,0.90,5,0.7661
57,150,0.05,0.85,4,0.7661
58,150,0.05,0.85,5,0.7627
30,100,0.05,0.85,4,0.7615
51,100,0.10,0.90,4,0.7614
...,...,...,...,...,...
70,150,0.75,0.90,5,0.5329
14,50,0.75,0.85,6,0.5327
13,50,0.75,0.85,5,0.5322
10,50,0.75,0.80,5,0.5055
