In [1]:
import numpy as np
import pandas as pd
from time import time
from sklearn.datasets import make_classification
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import cross_val_score, RepeatedStratifiedKFold

In [3]:
from sklearn.datasets import make_classification

X, y = make_classification(n_samples=10000, n_features=20, n_informative=15, n_redundant=5, random_state=0)
X

array([[ 5.28003572,  0.07866376, -0.06964284, ..., -3.46287848,
        -0.03862101,  3.39993039],
       [ 2.16926235, -2.33555961, -0.39400421, ..., -0.86901332,
        -0.82929365, -5.54782464],
       [ 0.50841459, -2.64924693,  2.11503792, ..., -0.7373016 ,
        -2.11204173,  1.53660878],
       ...,
       [-2.0093232 , -1.21052136, -0.89558391, ...,  5.14633699,
        -0.04689061, -2.59038402],
       [-5.48082318,  1.69027971,  1.55338889, ..., -0.0579665 ,
         0.98007555, -1.9660304 ],
       [ 2.57435489, -0.13578773,  2.6852563 , ..., -0.66826083,
        -1.22760575,  0.39196471]])

In [4]:
X.shape


(10000, 20)

In [5]:
accuracy = {}
speed = {}


In [6]:
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedStratifiedKFold

In [7]:
model = GradientBoostingClassifier()

start = time()
cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=2, random_state=0)
score = cross_val_score(model, X, y, scoring='accuracy', cv=cv, n_jobs=-1)

speed['GradientBoosting'] = np.round(time() - start, 3)
accuracy['GradientBoosting'] = np.mean(score).round(3)

print(f"Mean Accuracy: {accuracy['GradientBoosting']}\nStd: {np.std(score):.3f}\nRun time: {speed['GradientBoosting']}s")

Mean Accuracy: 0.878
Std: 0.007
Run time: 46.796s


In [8]:
from sklearn.experimental import enable_hist_gradient_boosting
from sklearn.ensemble import HistGradientBoostingClassifier

In [9]:
model = HistGradientBoostingClassifier()

start = time()
cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=2, random_state=0)
score = cross_val_score(model, X, y, scoring='accuracy', cv=cv, n_jobs=-1)

speed['HistGradientBoosting'] = np.round(time() - start, 3)
accuracy['HistGradientBoosting'] = np.mean(score).round(3)

print(f"Mean Accuracy: {accuracy['HistGradientBoosting']}\nStd: {np.std(score):.3f}\nRun time: {speed['HistGradientBoosting']}s")

Mean Accuracy: 0.948
Std: 0.005
Run time: 7.161s


In [10]:
from xgboost import XGBClassifier


In [11]:
model = XGBClassifier()

start = time()
cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=2, random_state=0)
score = cross_val_score(model, X, y, scoring='accuracy', cv=cv, n_jobs=-1)

speed['XGB'] = np.round(time() - start, 3)
accuracy['XGB'] = np.mean(score).round(3)

print(f"Mean Accuracy: {accuracy['XGB']}\nStd: {np.std(score):.3f}\nRun time: {speed['XGB']}s")

Mean Accuracy: 0.873
Std: 0.009
Run time: 11.102s


In [12]:
from lightgbm import LGBMClassifier


In [13]:
model = LGBMClassifier()

start = time()
cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=2, random_state=0)
score = cross_val_score(model, X, y, scoring='accuracy', cv=cv, n_jobs=-1)

speed['LGBM'] = np.round(time() - start, 3)
accuracy['LGBM'] = np.mean(score).round(3)

print(f"Mean Accuracy: {accuracy['LGBM']}\nStd: {np.std(score):.3f}\nRun time: {speed['LGBM']}s")

Mean Accuracy: 0.949
Std: 0.006
Run time: 5.718s


In [14]:
!pip install catboost


Collecting catboost
[?25l  Downloading https://files.pythonhosted.org/packages/47/80/8e9c57ec32dfed6ba2922bc5c96462cbf8596ce1a6f5de532ad1e43e53fe/catboost-0.25.1-cp37-none-manylinux1_x86_64.whl (67.3MB)
[K     |████████████████████████████████| 67.3MB 57kB/s 
Installing collected packages: catboost
Successfully installed catboost-0.25.1


In [15]:
from catboost import CatBoostClassifier


In [16]:
model = CatBoostClassifier()

start = time()
cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=2, random_state=0)
score = cross_val_score(model, X, y, scoring='accuracy', cv=cv, n_jobs=-1)

speed['CatBoost'] = np.round(time() - start, 3)
accuracy['CatBoost'] = np.mean(score).round(3)

print(f"Mean Accuracy: {accuracy['CatBoost']}\nStd: {np.std(score):.3f}\nRun time: {speed['CatBoost']}s")

Mean Accuracy: 0.964
Std: 0.004
Run time: 135.432s


In [17]:
print("Accuracy: ")
{k: v for k, v in sorted(accuracy.items(), key=lambda i: i[1], reverse=True)}

Accuracy: 


{'CatBoost': 0.964,
 'GradientBoosting': 0.878,
 'HistGradientBoosting': 0.948,
 'LGBM': 0.949,
 'XGB': 0.873}

In [18]:
print("Speed:")
{ k: v for k, v in sorted(speed.items(), key=lambda i: i[1], reverse=False)}

Speed:


{'CatBoost': 135.432,
 'GradientBoosting': 46.796,
 'HistGradientBoosting': 7.161,
 'LGBM': 5.718,
 'XGB': 11.102}