In [39]:
from meta_model import SequentialEnsembleRegressor

In [40]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import cross_val_score
from sklearn.base import BaseEstimator, RegressorMixin, clone
from sklearn.utils import check_random_state
from sklearn.model_selection import KFold

In [41]:
# --- CARGA DE DATOS ---
house_prices_df = pd.read_csv("house_prices.csv")
parkinsons_df = pd.read_csv("parkinsons.csv")

In [42]:
# --- PREPROCESADO HOUSE_PRICES ---
X_house = house_prices_df.drop(columns="SalePrice").copy()
y_house = house_prices_df["SalePrice"].copy()

In [43]:
# Codificar variables categóricas con LabelEncoder
for col in X_house.select_dtypes(include=["object"]).columns:
    X_house[col] = LabelEncoder().fit_transform(X_house[col].astype(str))

In [44]:
# --- PREPROCESADO PARKINSONS ---
X_park = parkinsons_df.drop(columns="total_UPDRS").copy()
y_park = parkinsons_df["total_UPDRS"].copy()

In [45]:
# --- MOSTRAR DIMENSIONES RESULTANTES ---
print("HOUSE PRICES:", X_house.shape, y_house.shape)
print("PARKINSONS:", X_park.shape, y_park.shape)

HOUSE PRICES: (560, 37) (560,)
PARKINSONS: (2000, 19) (2000,)


In [28]:
kf = KFold(n_splits=5, shuffle=True, random_state=42)

In [48]:
# Entrenamiento y validación cruzada con dataset de Parkinson
model = SequentialEnsembleRegressor(
    base_estimator=DecisionTreeRegressor(max_depth=3),
    n_estimators=100,
    sample_size=0.8,
    lr=0.1,
    random_state=42,
)

scores = cross_val_score(model, X_park.values, y_park.values, cv=kf, scoring='r2')

print("R² medio (Parkinson):", np.round(scores.mean(), 4))

R² medio (Parkinson): 0.758


In [49]:
# Lista vacía para almacenar resultados
resultados = []

# Valores a explorar manualmente
n_estimators_list = [10, 50, 100]
lr_list = [0.05, 0.1, 0.2]
sample_size_list = [0.6, 0.8, 1.0]

# Triple bucle manual
for n_estimators in n_estimators_list:
    for lr in lr_list:
        for sample_size in sample_size_list:
            model = SequentialEnsembleRegressor(
                base_estimator=DecisionTreeRegressor(max_depth=3),
                n_estimators=n_estimators,
                lr=lr,
                sample_size=sample_size,
                random_state=42
            )
            r2_scores = cross_val_score(model, X_park.values, y_park.values, cv=kf, scoring='r2')
            resultados.append({
                "n_estimators": n_estimators,
                "lr": lr,
                "sample_size": sample_size,
                "r2_mean": np.round(r2_scores.mean(), 4)
            })

# Guardar y ordenar resultados
df_resultados = pd.DataFrame(resultados)
df_resultados.sort_values(by="r2_mean", ascending=False, inplace=True)

In [50]:
df_resultados

Unnamed: 0,n_estimators,lr,sample_size,r2_mean
26,100,0.2,1.0,0.8164
25,100,0.2,0.8,0.8073
24,100,0.2,0.6,0.7971
22,100,0.1,0.8,0.7613
16,50,0.2,0.8,0.7577
17,50,0.2,1.0,0.756
23,100,0.1,1.0,0.7547
21,100,0.1,0.6,0.7453
15,50,0.2,0.6,0.7441
12,50,0.1,0.6,0.6705
