# Algoritmos de conjunto

sklearn.ensemble

In [1]:
import seaborn as sns
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_absolute_error, root_mean_squared_error, mean_absolute_percentage_error

df = sns.load_dataset('mpg').dropna()

features = ['weight', 'cylinders', 'displacement', 'horsepower', 'acceleration', 'model_year']
X = df[features]
y = df['mpg']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)



# 1. Bagging (Boostrap Aggregating)

In [2]:
from sklearn.ensemble import BaggingRegressor, RandomForestRegressor, ExtraTreesRegressor

models = {
    'BaggingRegressor': BaggingRegressor(random_state=42),
    'RandomForestRegressor': RandomForestRegressor(random_state=42),
    'ExtraTreesRegressor': ExtraTreesRegressor(random_state=42),
    
}

for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    print(f"{name}: R2 = {r2_score(y_test, y_pred):.3f}, MAE = {mean_absolute_error(y_test, y_pred):.3f}, RMSE = {root_mean_squared_error(y_test, y_pred):.3f}, MAPE = {mean_absolute_percentage_error(y_test, y_pred):.3f}")

BaggingRegressor: R2 = 0.842, MAE = 2.049, RMSE = 2.838, MAPE = 0.091
RandomForestRegressor: R2 = 0.885, MAE = 1.761, RMSE = 2.423, MAPE = 0.080
ExtraTreesRegressor: R2 = 0.896, MAE = 1.693, RMSE = 2.305, MAPE = 0.076


# 2. Voting

In [8]:
from sklearn.ensemble import VotingRegressor
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import  SVR
from sklearn.tree import DecisionTreeRegressor

model_1 = LinearRegression()
model_2 = KNeighborsRegressor()
model_3 = SVR()
model_4 = DecisionTreeRegressor()

model = VotingRegressor([
    ('linear_regression', model_1),
    ('knn', model_2),
    ('svr', model_3),
    ('cart', model_4),
])

model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print(f"VotingRegressor: R2 = {r2_score(y_test, y_pred):.3f}, MAE = {mean_absolute_error(y_test, y_pred):.3f}, RMSE = {root_mean_squared_error(y_test, y_pred):.3f}, MAPE = {mean_absolute_percentage_error(y_test, y_pred):.3f}")

VotingRegressor: R2 = 0.826, MAE = 2.186, RMSE = 2.981, MAPE = 0.096


In [9]:
model_1 = RandomForestRegressor()
model_2 = ExtraTreesRegressor()

model = VotingRegressor([
    ('rf', model_1),
    ('et', model_2),
  
])

model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print(f"VotingRegressor: R2 = {r2_score(y_test, y_pred):.3f}, MAE = {mean_absolute_error(y_test, y_pred):.3f}, RMSE = {root_mean_squared_error(y_test, y_pred):.3f}, MAPE = {mean_absolute_percentage_error(y_test, y_pred):.3f}")

VotingRegressor: R2 = 0.897, MAE = 1.665, RMSE = 2.297, MAPE = 0.075


# 3. Boosting

In [4]:
from sklearn.ensemble import AdaBoostRegressor, GradientBoostingRegressor

models = {
    'AdaBoostRegressor': AdaBoostRegressor(random_state=42), # Utiliza por defecto 50 stimator
    'GradientBoostingRegressor': GradientBoostingRegressor() # 100 stimator por defecto
}

for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    print(f"{name}: R2 = {r2_score(y_test, y_pred):.3f}, MAE = {mean_absolute_error(y_test, y_pred):.3f}, RMSE = {root_mean_squared_error(y_test, y_pred):.3f}, MAPE = {mean_absolute_percentage_error(y_test, y_pred):.3f}")

AdaBoostRegressor: R2 = 0.818, MAE = 2.240, RMSE = 3.048, MAPE = 0.103
GradientBoostingRegressor: R2 = 0.875, MAE = 1.810, RMSE = 2.526, MAPE = 0.081


# 4. Stacking (Stacked generalization)

In [12]:
from sklearn.ensemble import StackingRegressor


model_2 = KNeighborsRegressor()
model_3 = SVR()
model_4 = DecisionTreeRegressor()

model = StackingRegressor([
        ('linear_regression', model_1),
        ('knn', model_2),
        ('svr', model_3),
        ('cart', model_4),
    ],
    final_estimator = RandomForestRegressor(random_state=42)  # este es un meta estimator que se entrena usando como entrada los modelos base                     
)                    

model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print(f"StackingRegressor: R2 = {r2_score(y_test, y_pred):.3f}, MAE = {mean_absolute_error(y_test, y_pred):.3f}, RMSE = {root_mean_squared_error(y_test, y_pred):.3f}, MAPE = {mean_absolute_percentage_error(y_test, y_pred):.3f}")

StackingRegressor: R2 = 0.873, MAE = 1.951, RMSE = 2.551, MAPE = 0.089
