In [1]:

from sklearn.datasets import make_regression, make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score, accuracy_score
# Gradient Boosting
from sklearn.ensemble import GradientBoostingRegressor, GradientBoostingClassifier
# XGBoost
from xgboost import XGBRegressor, XGBClassifier
# LightGBM
from lightgbm import LGBMRegressor, LGBMClassifier
# CatBoost
from catboost import CatBoostRegressor, CatBoostClassifier

# Regression Comparison of All Algorithms

In [2]:
import warnings
warnings.filterwarnings('ignore')

# Dataset
X_reg, y_reg = make_regression(n_samples=2000, n_features=20, noise=0.2, random_state=42)
X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(
    X_reg, y_reg, test_size=0.2, random_state=42
)

# Gradient Boosting Regressor
gbr = GradientBoostingRegressor(n_estimators=200, learning_rate=0.1, max_depth=3, random_state=42)
gbr.fit(X_train_reg, y_train_reg)
y_pred_gbr = gbr.predict(X_test_reg)
print("Gradient Boosting Regressor: MSE =", mean_squared_error(y_test_reg, y_pred_gbr), "R2 =", r2_score(y_test_reg, y_pred_gbr))

# XGBoost Regressor
xgb = XGBRegressor(n_estimators=200, learning_rate=0.1, max_depth=3, random_state=42)
xgb.fit(X_train_reg, y_train_reg)
y_pred_xgb = xgb.predict(X_test_reg)
print("XGBoost Regressor: MSE =", mean_squared_error(y_test_reg, y_pred_xgb), "R2 =", r2_score(y_test_reg, y_pred_xgb))

# LightGBM Regressor
lgbm = LGBMRegressor(n_estimators=200, learning_rate=0.1, max_depth=-1, random_state=42)
lgbm.fit(X_train_reg, y_train_reg)
y_pred_lgbm = lgbm.predict(X_test_reg)
print("LightGBM Regressor: MSE =", mean_squared_error(y_test_reg, y_pred_lgbm), "R2 =", r2_score(y_test_reg, y_pred_lgbm))

# CatBoost Regressor
cat = CatBoostRegressor(n_estimators=200, learning_rate=0.1, depth=6, random_state=42, verbose=0)
cat.fit(X_train_reg, y_train_reg)
y_pred_cat = cat.predict(X_test_reg)
print("CatBoost Regressor: MSE =", mean_squared_error(y_test_reg, y_pred_cat), "R2 =", r2_score(y_test_reg, y_pred_cat))



Gradient Boosting Regressor: MSE = 1027.7920662466356 R2 = 0.9665940401150309
XGBoost Regressor: MSE = 1214.0679278931755 R2 = 0.9605395820528794
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000406 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5100
[LightGBM] [Info] Number of data points in the train set: 1600, number of used features: 20
[LightGBM] [Info] Start training from score -5.915670
LightGBM Regressor: MSE = 1715.8620906361114 R2 = 0.944229944898043
CatBoost Regressor: MSE = 490.5659406264835 R2 = 0.9840553097540958


# Classification Comparison of All Algorithms

In [3]:
# Dataset
X_clf, y_clf = make_classification(n_samples=2000, n_features=20, n_classes=3, n_informative=10, random_state=42)
X_train_clf, X_test_clf, y_train_clf, y_test_clf = train_test_split(
    X_clf, y_clf, test_size=0.2, random_state=42
)

# Gradient Boosting Classifier
gbc = GradientBoostingClassifier(n_estimators=200, learning_rate=0.1, max_depth=3, random_state=42)
gbc.fit(X_train_clf, y_train_clf)
y_pred_gbc = gbc.predict(X_test_clf)
print("Gradient Boosting Classifier: Accuracy =", accuracy_score(y_test_clf, y_pred_gbc))

# XGBoost Classifier
xgb_c = XGBClassifier(n_estimators=200, learning_rate=0.1, max_depth=3, random_state=42, use_label_encoder=False, eval_metric='mlogloss')
xgb_c.fit(X_train_clf, y_train_clf)
y_pred_xgb_c = xgb_c.predict(X_test_clf)
print("XGBoost Classifier: Accuracy =", accuracy_score(y_test_clf, y_pred_xgb_c))

# LightGBM Classifier
lgbm_c = LGBMClassifier(n_estimators=200, learning_rate=0.1, max_depth=-1, random_state=42)
lgbm_c.fit(X_train_clf, y_train_clf)
y_pred_lgbm_c = lgbm_c.predict(X_test_clf)
print("LightGBM Classifier: Accuracy =", accuracy_score(y_test_clf, y_pred_lgbm_c))

# CatBoost Classifier
cat_c = CatBoostClassifier(n_estimators=200, learning_rate=0.1, depth=6, random_state=42, verbose=0)
cat_c.fit(X_train_clf, y_train_clf)
y_pred_cat_c = cat_c.predict(X_test_clf)
print("CatBoost Classifier: Accuracy =", accuracy_score(y_test_clf, y_pred_cat_c))


Gradient Boosting Classifier: Accuracy = 0.8475
XGBoost Classifier: Accuracy = 0.845
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000516 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 5100
[LightGBM] [Info] Number of data points in the train set: 1600, number of used features: 20
[LightGBM] [Info] Start training from score -1.088043
[LightGBM] [Info] Start training from score -1.102997
[LightGBM] [Info] Start training from score -1.104882
LightGBM Classifier: Accuracy = 0.855
CatBoost Classifier: Accuracy = 0.8375
