In [None]:
import lightgbm as lgb
import numpy as np
import xgboost as xgb
from catboost import CatBoostClassifier
from sklearn.datasets import make_classification, make_regression
from sklearn.ensemble import (
    GradientBoostingRegressor,
)
from sklearn.model_selection import (
    RepeatedKFold,
    cross_val_score,
)

In [9]:
x, y = make_regression(n_samples=1000, n_features=20, random_state=0)
model = GradientBoostingRegressor()
cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=0)
n_scores = cross_val_score(model, x, y, cv=cv, n_jobs=-1)
print(np.mean(n_scores), np.std(n_scores))

0.8771558882220207 0.01796336435596255


In [10]:
x, y = make_classification(
    n_samples=1000, n_features=20, n_informative=15, n_redundant=5, random_state=0
)
model = xgb.XGBClassifier(objective="binary:logistic", random_state=0)
cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=0)
n_scores = cross_val_score(model, x, y, cv=cv, n_jobs=-1)
print(np.mean(n_scores), np.std(n_scores))

0.8889999999999999 0.031973947728319914


In [11]:
x, y = make_classification(
    n_samples=1000, n_features=20, n_informative=15, n_redundant=5, random_state=0
)
model = xgb.XGBRFClassifier(objective="binary:logistic", random_state=0)
cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=0)
n_scores = cross_val_score(model, x, y, cv=cv, n_jobs=-1)
print(np.mean(n_scores), np.std(n_scores))

0.8430000000000001 0.041565209811411596


In [14]:
x, y = make_classification(
    n_samples=10000,
    n_features=20,
    n_informative=15,
    n_redundant=5,
    n_classes=10,
    random_state=0,
)
train_data = lgb.Dataset(x, label=y)
param = {
    "num_leaves": 31,
    "objective": "multiclass",
    "num_class": 10,
    "metric": ["multi_logloss"],
}
bst = lgb.train(param, train_data, num_boost_round=30)
(bst.predict(x).argmax(axis=-1) == y).mean()

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000722 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5100
[LightGBM] [Info] Number of data points in the train set: 10000, number of used features: 20
[LightGBM] [Info] Start training from score -2.299590
[LightGBM] [Info] Start training from score -2.299590
[LightGBM] [Info] Start training from score -2.306593
[LightGBM] [Info] Start training from score -2.298593
[LightGBM] [Info] Start training from score -2.311626
[LightGBM] [Info] Start training from score -2.302585
[LightGBM] [Info] Start training from score -2.302585
[LightGBM] [Info] Start training from score -2.302585
[LightGBM] [Info] Start training from score -2.296603
[LightGBM] [Info] Start training from score -2.305590


0.9038

In [23]:
x, y = make_classification(
    n_samples=1000, n_features=20, n_informative=15, n_redundant=5, random_state=0
)
model = CatBoostClassifier(
    iterations=4,
    depth=3,
    learning_rate=1,
    loss_function="Logloss",
    verbose=True,
    task_type="CPU",
)
cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=0)
n_scores = cross_val_score(model, x, y, cv=cv, n_jobs=-1)
print(np.mean(n_scores), np.std(n_scores))

0.735 0.043493294502332976
