In [2]:
from meta_model import *

In [3]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import OrdinalEncoder, MinMaxScaler
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import Ridge
from sklearn.model_selection import cross_val_score
from sklearn.base import BaseEstimator, RegressorMixin, clone
from sklearn.utils import check_random_state
from sklearn.model_selection import KFold

In [4]:
# --- CARGA DE DATOS ---
house_prices_df = pd.read_csv("../data/house_prices.csv")
parkinsons_df = pd.read_csv("../data/parkinsons.csv")

In [5]:
# Eliminamos filas con valores faltantes
house_prices_df = house_prices_df.dropna()
parkinsons_df = parkinsons_df.dropna()

In [6]:
# --- PREPROCESADO HOUSE_PRICES ---
X_house = house_prices_df.drop(columns="SalePrice").copy()
y_house = house_prices_df["SalePrice"].copy()

In [7]:
# Codificar atributos con OrdinalEncoder
cat_cols = X_house.select_dtypes(include="object").columns
X_house[cat_cols] = OrdinalEncoder().fit_transform(X_house[cat_cols])

In [8]:
# --- PREPROCESADO PARKINSONS ---
X_park = parkinsons_df.drop(columns="total_UPDRS").copy()
y_park = parkinsons_df["total_UPDRS"].copy()

In [9]:
normalizador = MinMaxScaler(
    # Cada atributo se normaliza al intervalo [0, 1]
    feature_range=(0, 1)
)

In [10]:
# Normalizar atributos House Prices
X_house_normalizados = X_house.copy()
X_house_normalizados[:] = normalizador.fit_transform(X_house_normalizados)

In [11]:
# Normalizar atributos Parkinson
X_park_normalizados = X_park.copy()
X_park_normalizados[:] = normalizador.fit_transform(X_park_normalizados)

In [12]:
# --- MOSTRAR DIMENSIONES RESULTANTES ---
print("HOUSE PRICES:", X_house.shape, y_house.shape)
print("PARKINSONS:", X_park.shape, y_park.shape)

HOUSE PRICES: (560, 37) (560,)
PARKINSONS: (2000, 19) (2000,)


In [13]:
kf = KFold(n_splits=5, shuffle=True, random_state=42)

In [14]:
# Modelo base DecisionTreeRegressor
modelo_base = DecisionTreeRegressor(max_depth=5, random_state=42)

In [15]:
# Modelo base. Dataset: Parkinson
r2_base_park = cross_val_score(modelo_base, X_park, y_park, cv=kf, scoring='r2')
print("Modelo base (Parkinson) - R² medio:", np.round(r2_base_park.mean(), 4))

Modelo base (Parkinson) - R² medio: 0.4939


In [16]:
# Entrenamiento y validación cruzada con dataset de Parkinson
model = SequentialEnsembleRegressor(
    base_estimator=DecisionTreeRegressor(max_depth=5),
    n_estimators=100,
    sample_size=0.8,
    lr=0.1,
    random_state=42,
)

scores = cross_val_score(model, X_park.values, y_park.values, cv=kf, scoring='r2')

print("R² medio (Parkinson):", np.round(scores.mean(), 4))

R² medio (Parkinson): 0.8865


In [65]:
# Valores a explorar manualmente
param_grid = {
    "n_estimators": [50, 100],
    "lr": [0.05, 0.1],
    "sample_size": [0.8],
    "max_depth": [3, 5, 10]
}

# Entrenamiento y validación cruzada con dataset de Parkinson y diferentes hiperparámetros
df_resultados = explorar_hiperparametros(
    estimator_class=DecisionTreeRegressor,
    param_grid=param_grid,
    X=X_park.values,
    y=y_park.values,
    cv=kf
)

# Mostrar resultados
df_resultados

Unnamed: 0,n_estimators,lr,sample_size,max_depth,r2_mean
8,100,0.05,0.8,10,0.9403
5,50,0.1,0.8,10,0.9346
11,100,0.1,0.8,10,0.9325
2,50,0.05,0.8,10,0.9195
10,100,0.1,0.8,5,0.8875
7,100,0.05,0.8,5,0.8526
4,50,0.1,0.8,5,0.846
1,50,0.05,0.8,5,0.7771
9,100,0.1,0.8,3,0.7576
3,50,0.1,0.8,3,0.6698


In [21]:
# Modelo base. Dataset: House Prices
r2_base_house = cross_val_score(modelo_base, X_house, y_house, cv=kf, scoring='r2')
print("Modelo base (House Prices) - R² medio:", np.round(r2_base_house.mean(), 4))

Modelo base (House Prices) - R² medio: 0.5256


In [22]:
# Entrenamiento y validación cruzada con dataset de House Prices
model = SequentialEnsembleRegressor(
    base_estimator=DecisionTreeRegressor(max_depth=5),
    n_estimators=100,
    sample_size=0.8,
    lr=0.1,
    random_state=42,
)

scores = cross_val_score(model, X_house.values, y_house.values, cv=kf, scoring='r2')

print("R² medio (House Prices):", np.round(scores.mean(), 4))

R² medio (House Prices): 0.7442


In [21]:
# Valores a explorar manualmente
param_grid = {
    "n_estimators": [50, 100, 120],
    "lr": [0.05, 0.1],
    "sample_size": [0.6, 0.8, 1.0],
    "max_depth": [3, 5, 10]
}

# Entrenamiento y validación cruzada con dataset de Parkinson y diferentes hiperparámetros
df_resultados = explorar_hiperparametros(
    estimator_class=DecisionTreeRegressor,
    param_grid=param_grid,
    X=X_house.values,
    y=y_house.values,
    cv=kf
)

# Mostrar resultados
df_resultados

Unnamed: 0,n_estimators,lr,sample_size,max_depth,r2_mean
13,50,0.1,0.8,5,0.7688
40,120,0.05,0.8,5,0.7631
22,100,0.05,0.8,5,0.7625
28,100,0.1,0.6,5,0.7597
11,50,0.1,0.6,10,0.7556
31,100,0.1,0.8,5,0.7554
10,50,0.1,0.6,5,0.7516
49,120,0.1,0.8,5,0.7471
20,100,0.05,0.6,10,0.7462
46,120,0.1,0.6,5,0.7459


In [22]:
# Valores a explorar manualmente
param_grid = {
    "n_estimators": [100, 150, 200],
    "lr": [0.01, 0.02, 0.05],
    "sample_size": [0.7, 0.8, 0.85, 0.9],
    "max_depth": [5, 6, 7]
}

# Entrenamiento y validación cruzada con dataset de Parkinson y diferentes hiperparámetros
df_resultados = explorar_hiperparametros(
    estimator_class=DecisionTreeRegressor,
    param_grid=param_grid,
    X=X_house.values,
    y=y_house.values,
    cv=kf
)

# Mostrar resultados
df_resultados

Unnamed: 0,n_estimators,lr,sample_size,max_depth,r2_mean
66,150,0.05,0.85,5,0.7664
69,150,0.05,0.90,5,0.7651
63,150,0.05,0.80,5,0.7637
27,100,0.05,0.80,5,0.7618
99,200,0.05,0.80,5,0.7592
...,...,...,...,...,...
0,100,0.01,0.70,5,0.6223
10,100,0.01,0.90,6,0.6208
3,100,0.01,0.80,5,0.6201
6,100,0.01,0.85,5,0.6196


In [23]:
# Valores a explorar manualmente
param_grid = {
    "n_estimators": [50, 100, 150],
    "lr": [0.05, 0.1],
    "sample_size": [0.8, 0.85, 0.9],
    "max_depth": [5, 6, 7]
}

# Entrenamiento y validación cruzada con dataset de Parkinson y diferentes hiperparámetros
df_resultados = explorar_hiperparametros(
    estimator_class=DecisionTreeRegressor,
    param_grid=param_grid,
    X=X_house.values,
    y=y_house.values,
    cv=kf
)

# Mostrar resultados
df_resultados

Unnamed: 0,n_estimators,lr,sample_size,max_depth,r2_mean
30,100,0.1,0.85,5,0.7665
31,100,0.1,0.85,6,0.7652
12,50,0.1,0.85,5,0.762
33,100,0.1,0.9,5,0.7619
24,100,0.05,0.9,5,0.7571
27,100,0.1,0.8,5,0.7569
36,150,0.05,0.8,5,0.7565
15,50,0.1,0.9,5,0.7564
49,150,0.1,0.85,6,0.7563
34,100,0.1,0.9,6,0.7557


In [73]:
# Valores a explorar manualmente
param_grid = {
    "n_estimators": [50, 100, 150],
    "lr": [0.05, 0.075, 0.1],
    "sample_size": [0.8, 0.85, 0.9],
    "max_depth": [4, 5, 6]
}

# Entrenamiento y validación cruzada con dataset de Parkinson y diferentes hiperparámetros
df_resultados = explorar_hiperparametros(
    estimator_class=DecisionTreeRegressor,
    param_grid=param_grid,
    X=X_house.values,
    y=y_house.values,
    cv=kf
)

# Mostrar resultados
df_resultados

Unnamed: 0,n_estimators,lr,sample_size,max_depth,r2_mean
49,100,0.100,0.85,5,0.7738
76,150,0.100,0.85,5,0.7684
67,150,0.075,0.85,5,0.7683
69,150,0.075,0.90,4,0.7639
60,150,0.050,0.90,4,0.7637
...,...,...,...,...,...
29,100,0.050,0.80,6,0.7355
59,150,0.050,0.85,6,0.7339
0,50,0.050,0.80,4,0.7318
74,150,0.100,0.80,6,0.7317


In [23]:
# Modelo base: LinearRegression
modelo_base_lr = LinearRegression()

In [24]:
# Modelo base. Dataset Parkinson
r2_base_park = cross_val_score(modelo_base_lr, X_park_normalizados, y_park, cv=kf, scoring='r2')
print("Modelo base (LinearRegression - Parkinson) - R² medio:", np.round(r2_base_park.mean(), 4))

Modelo base (LinearRegression - Parkinson) - R² medio: 0.168


In [25]:
# Entrenamiento y validación cruzada con dataset de Parkinson
model = SequentialEnsembleRegressor(
    base_estimator=LinearRegression(),
    n_estimators=100,
    sample_size=0.8,
    lr=0.1,
    random_state=42,
)

scores = cross_val_score(model, X_park_normalizados.values, y_park.values, cv=kf, scoring='r2')

print("R² medio (Parkinson):", np.round(scores.mean(), 4))

R² medio (Parkinson): 0.1625


In [74]:
# Valores a explorar manualmente
param_grid = {
    "n_estimators": [50, 100, 150],
    "lr": [0.05, 0.075, 0.1],
    "sample_size": [0.8, 0.85, 0.9],
}

# Entrenamiento y validación cruzada con dataset de Parkinson y diferentes hiperparámetros
df_resultados = explorar_hiperparametros(
    estimator_class=LinearRegression,
    param_grid=param_grid,
    X=X_park_normalizados.values,
    y=y_park.values,
    cv=kf
)

# Mostrar resultados
df_resultados

Unnamed: 0,n_estimators,lr,sample_size,max_depth,r2_mean
25,150,0.1,0.85,,0.1689
24,150,0.1,0.8,,0.1688
22,150,0.075,0.85,,0.1687
21,150,0.075,0.8,,0.1685
26,150,0.1,0.9,,0.1683
19,150,0.05,0.85,,0.1683
23,150,0.075,0.9,,0.1681
18,150,0.05,0.8,,0.168
20,150,0.05,0.9,,0.1679
2,50,0.05,0.9,,0.1677


In [45]:
# Entrenamiento y validación cruzada con dataset de House Prices
model = SequentialEnsembleRegressor(
    base_estimator=LinearRegression(),
    n_estimators=100,
    sample_size=0.8,
    lr=0.1,
    random_state=42,
)

scores = cross_val_score(model, X_house_normalizados.values, y_house.values, cv=kf, scoring='r2')

print("R² medio (House Prices):", np.round(scores.mean(), 4))

R² medio (House Prices): 0.7058


In [75]:
# Valores a explorar manualmente
param_grid = {
    "n_estimators": [50, 100, 150],
    "lr": [0.05, 0.075, 0.1],
    "sample_size": [0.8, 0.85, 0.9],
}

# Entrenamiento y validación cruzada con dataset de Parkinson y diferentes hiperparámetros
df_resultados = explorar_hiperparametros(
    estimator_class=LinearRegression,
    param_grid=param_grid,
    X=X_house_normalizados.values,
    y=y_house.values,
    cv=kf
)

# Mostrar resultados
df_resultados

Unnamed: 0,n_estimators,lr,sample_size,max_depth,r2_mean
16,100,0.1,0.85,,0.7062
17,100,0.1,0.9,,0.7058
15,100,0.1,0.8,,0.7058
14,100,0.075,0.9,,0.7052
13,100,0.075,0.85,,0.7051
11,100,0.05,0.9,,0.7049
12,100,0.075,0.8,,0.7045
10,100,0.05,0.85,,0.7043
9,100,0.05,0.8,,0.7034
25,150,0.1,0.85,,0.7014


In [66]:
# Entrenamiento y validación cruzada con dataset de Parkinson
model = SequentialEnsembleRegressor(
    base_estimator=Ridge,
    n_estimators=20,
    sample_size=0.8,
    lr=0.1,
    random_state=42,
)

scores = cross_val_score(model, X_park_normalizados.values, y_park.values, cv=kf, scoring='r2')

print("R² medio (Parkinson):", np.round(scores.mean(), 4))

R² medio (Parkinson): 0.1706


In [76]:
# Valores a explorar manualmente
param_grid = {
    "n_estimators": [50, 100, 150],
    "lr": [0.05, 0.075, 0.1],
    "sample_size": [0.8, 0.85, 0.9],
}

# Entrenamiento y validación cruzada con dataset de Parkinson y diferentes hiperparámetros
df_resultados = explorar_hiperparametros(
    estimator_class=Ridge,
    param_grid=param_grid,
    X=X_park_normalizados.values,
    y=y_park.values,
    cv=kf
)

# Mostrar resultados
df_resultados

Unnamed: 0,n_estimators,lr,sample_size,max_depth,r2_mean
5,50,0.075,0.9,,0.1727
2,50,0.05,0.9,,0.1725
3,50,0.075,0.8,,0.1725
4,50,0.075,0.85,,0.1725
0,50,0.05,0.8,,0.1723
1,50,0.05,0.85,,0.1723
8,50,0.1,0.9,,0.172
10,100,0.05,0.85,,0.1719
19,150,0.05,0.85,,0.1718
7,50,0.1,0.85,,0.1717


In [68]:
# Entrenamiento y validación cruzada con dataset de House Prices
model = SequentialEnsembleRegressor(
    base_estimator=Ridge(alpha=1.0),
    n_estimators=100,
    sample_size=0.8,
    lr=0.1,
    random_state=42,
)

scores = cross_val_score(model, X_house_normalizados.values, y_house.values, cv=kf, scoring='r2')

print("R² medio (House Prices):", np.round(scores.mean(), 4))

R² medio (House Prices): 0.7088


In [72]:
# Valores a explorar manualmente
param_grid = {
    "n_estimators": [25, 50, 100],
    "lr": [0.05, 0.075, 0.1],
    "sample_size": [0.8, 0.85, 0.9],
}

# Entrenamiento y validación cruzada con dataset de Parkinson y diferentes hiperparámetros
df_resultados = explorar_hiperparametros(
    estimator_class=Ridge,
    param_grid=param_grid,
    X=X_house_normalizados.values,
    y=y_house.values,
    cv=kf
)

# Mostrar resultados
df_resultados

Unnamed: 0,n_estimators,lr,sample_size,max_depth,r2_mean
6,25,0.1,0.8,,0.7186
7,25,0.1,0.85,,0.718
9,50,0.05,0.8,,0.7177
10,50,0.05,0.85,,0.7176
11,50,0.05,0.9,,0.7176
8,25,0.1,0.9,,0.7175
3,25,0.075,0.8,,0.7153
4,25,0.075,0.85,,0.7148
5,25,0.075,0.9,,0.7147
14,50,0.075,0.9,,0.7127


In [15]:
# Modelo base: KNeighborsRegressor
modelo_base_knn = KNeighborsRegressor(n_neighbors=7)

In [16]:
# Modelo base. Dataset Parkinson
r2_base_park = cross_val_score(modelo_base_knn, X_park_normalizados, y_park, cv=kf, scoring='r2')
print("Modelo base (KNN - Parkinson) - R² medio:", np.round(r2_base_park.mean(), 4))

Modelo base (KNN - Parkinson) - R² medio: 0.5155


In [17]:
# Entrenamiento y validación cruzada con dataset de Parkinson
model = SequentialEnsembleRegressor(
    base_estimator=KNeighborsRegressor(n_neighbors=10),
    n_estimators=200,
    sample_size=0.8,
    lr=0.01,
    random_state=42,
)

scores = cross_val_score(model, X_park_normalizados.values, y_park.values, cv=kf, scoring='r2')

print("R² medio (House Prices):", np.round(scores.mean(), 4))

R² medio (House Prices): 0.4929


In [18]:
# Valores a explorar manualmente
param_grid = {
    "n_estimators": [200, 400],
    "lr": [0.005, 0.01],
    "sample_size": [0.7, 0.8],
    "n_neighbors": [5, 7],
    "metric": ["euclidean", "manhattan"]
}

# Entrenamiento y validación cruzada con dataset de Parkinson y diferentes hiperparámetros
df_resultados = explorar_hiperparametros_knn(
    param_grid=param_grid,
    X=X_park_normalizados.values,
    y=y_park.values,
    cv=kf
)

# Mostrar resultados
df_resultados

Unnamed: 0,n_estimators,lr,sample_size,n_neighbors,metric,r2_mean
9,200,0.01,0.7,5,manhattan,0.5763
17,400,0.005,0.7,5,manhattan,0.5754
21,400,0.005,0.8,5,manhattan,0.5716
13,200,0.01,0.8,5,manhattan,0.5714
11,200,0.01,0.7,7,manhattan,0.57
19,400,0.005,0.7,7,manhattan,0.5698
23,400,0.005,0.8,7,manhattan,0.5688
15,200,0.01,0.8,7,manhattan,0.5688
5,200,0.005,0.8,5,manhattan,0.5103
16,400,0.005,0.7,5,euclidean,0.507


In [19]:
# Modelo base. Dataset House Prices
r2_base_house = cross_val_score(modelo_base_knn, X_house_normalizados, y_house, cv=kf, scoring='r2')
print("Modelo base (KNN - House Prices) - R² medio:", np.round(r2_base_house.mean(), 4))

Modelo base (KNN - House Prices) - R² medio: 0.6707


In [20]:
# Entrenamiento y validación cruzada con dataset de House Prices
model = SequentialEnsembleRegressor(
    base_estimator=KNeighborsRegressor(n_neighbors=7),
    n_estimators=400,
    sample_size=0.8,
    lr=0.005,
    random_state=42,
)

scores = cross_val_score(model, X_house_normalizados.values, y_house.values, cv=kf, scoring='r2')

print("R² medio (House Prices):", np.round(scores.mean(), 4))

R² medio (House Prices): 0.679


In [21]:
# Valores a explorar manualmente
param_grid = {
    "n_estimators": [200, 400],
    "lr": [0.005, 0.01],
    "sample_size": [0.7, 0.8],
    "n_neighbors": [5, 7],
    "metric": ["euclidean", "manhattan"]
}

# Entrenamiento y validación cruzada con dataset de Parkinson y diferentes hiperparámetros
df_resultados = explorar_hiperparametros_knn(
    param_grid=param_grid,
    X=X_house_normalizados.values,
    y=y_house.values,
    cv=kf
)

# Mostrar resultados
df_resultados

Unnamed: 0,n_estimators,lr,sample_size,n_neighbors,metric,r2_mean
19,400,0.005,0.7,7,manhattan,0.6976
11,200,0.01,0.7,7,manhattan,0.6972
23,400,0.005,0.8,7,manhattan,0.6938
15,200,0.01,0.8,7,manhattan,0.693
10,200,0.01,0.7,7,euclidean,0.6863
18,400,0.005,0.7,7,euclidean,0.6862
17,400,0.005,0.7,5,manhattan,0.6856
9,200,0.01,0.7,5,manhattan,0.6849
27,400,0.01,0.7,7,manhattan,0.681
22,400,0.005,0.8,7,euclidean,0.679
