In [51]:

import numpy as np
from sklearn.ensemble import GradientBoostingRegressor, HistGradientBoostingRegressor
from sklearn.metrics import mean_absolute_error, r2_score
from sklearn.model_selection import train_test_split
import xgboost as xgb
import lightgbm as lgb
from catboost import CatBoostRegressor
import numpy as np
from sklearn.compose import make_column_transformer
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split


In [52]:
np.random.seed(60)
m = 1000
X = np.random.rand(m, 1) - 0.5
y = 3 * X[:, 0] ** 2 + 0.05 * np.random.randn(m)
cat_data = np.random.choice(['A', 'B', 'C'], size=(m, 1))
X_categorical = np.hstack([X, cat_data])
y = 0.7 * X[:, 0] ** 2 + 8 * X[:, 0] + np.random.randn(m) * 0.1
print("Dados contínuos e categóricos")
print("Shape de X_combined:", cat_data.shape)
print("Shape de y:", y.shape)

Dados contínuos e categóricos
Shape de X_combined: (1000, 1)
Shape de y: (1000,)


In [53]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=60)
X_train_cat, X_test_cat, y_train_cat, y_test_cat = train_test_split(X_categorical, y, test_size=0.2, random_state=60)

In [54]:
column_transformer = make_column_transformer(
    (OneHotEncoder(), [1]),
    remainder="passthrough"
)
hgb_reg_comb = make_pipeline(column_transformer, HistGradientBoostingRegressor(random_state=60))
hgb_reg_comb.fit(X_train_cat, y_train_cat)



In [55]:
y_pred_comb = hgb_reg_comb.predict(X_test_cat)
mae_comb = mean_absolute_error(y_test_cat, y_pred_comb)
r2_comb = r2_score(y_test_cat, y_pred_comb)

print("\nHistogram-Based Gradient Boosting Regressor (Contínuos e Categóricos):")
print(f"Mean Absolute Error (MAE): {mae_comb}")
print(f"R² Score: {r2_comb}")


Histogram-Based Gradient Boosting Regressor (Contínuos e Categóricos):
Mean Absolute Error (MAE): 0.08919953272434572
R² Score: 0.9976214780923666


In [56]:
gbrt = GradientBoostingRegressor(max_depth=2, n_estimators=100, learning_rate=0.1, random_state=60)
gbrt.fit(X_train, y_train)
y_pred_gbrt = gbrt.predict(X_test)


In [57]:
hgb_reg = HistGradientBoostingRegressor(max_iter=100, max_depth=2, random_state=60)
hgb_reg.fit(X_train, y_train)
y_pred_hgb = hgb_reg.predict(X_test)

In [58]:
xgb_reg = xgb.XGBRegressor(n_estimators=100, max_depth=2, learning_rate=0.1, random_state=60)
xgb_reg.fit(X_train, y_train)
y_pred_xgb = xgb_reg.predict(X_test)


In [59]:
lgb_reg = lgb.LGBMRegressor(n_estimators=100, max_depth=2, learning_rate=0.1, random_state=60, verbose=-1)
lgb_reg.fit(X_train, y_train)
y_pred_lgb = lgb_reg.predict(X_test)


In [60]:
cat_reg = CatBoostRegressor(iterations=100, depth=2, learning_rate=0.1, random_state=60, verbose=0)
cat_reg.fit(X_train, y_train)
y_pred_cat = cat_reg.predict(X_test)


In [61]:
mae_gbrt = mean_absolute_error(y_test, y_pred_gbrt)
r2_gbrt = r2_score(y_test, y_pred_gbrt)
mae_hgb = mean_absolute_error(y_test, y_pred_hgb)
r2_hgb = r2_score(y_test, y_pred_hgb)
mae_xgb = mean_absolute_error(y_test, y_pred_xgb)
r2_xgb = r2_score(y_test, y_pred_xgb)
mae_lgb = mean_absolute_error(y_test, y_pred_lgb)
r2_lgb = r2_score(y_test, y_pred_lgb)
mae_cat = mean_absolute_error(y_test, y_pred_cat)
r2_cat = r2_score(y_test, y_pred_cat)

In [62]:
print("Gradient Boosting Regressor:")
print(f"Mean Absolute Error (MAE): {mae_gbrt}")
print(f"R² Score: {r2_gbrt}")

print("\nHistogram-Based Gradient Boosting Regressor:")
print(f"Mean Absolute Error (MAE): {mae_hgb}")
print(f"R² Score: {r2_hgb}")

print("\nXGBoost Regressor:")
print(f"Mean Absolute Error (MAE): {mae_xgb}")
print(f"R² Score: {r2_xgb}")

print("\nLightGBM Regressor:")
print(f"Mean Absolute Error (MAE): {mae_lgb}")
print(f"R² Score: {r2_lgb}")

print("\nCatBoost Regressor:")
print(f"Mean Absolute Error (MAE): {mae_cat}")
print(f"R² Score: {r2_cat}")

Gradient Boosting Regressor:
Mean Absolute Error (MAE): 0.08986082545464574
R² Score: 0.9975794467894376

Histogram-Based Gradient Boosting Regressor:
Mean Absolute Error (MAE): 0.08930857787596094
R² Score: 0.9976581833625154

XGBoost Regressor:
Mean Absolute Error (MAE): 0.08741743360250476
R² Score: 0.9976771114998497

LightGBM Regressor:
Mean Absolute Error (MAE): 0.08983152927879097
R² Score: 0.9976106960248986

CatBoost Regressor:
Mean Absolute Error (MAE): 0.09054686561391845
R² Score: 0.9975940267199761
