In [None]:
!pip install hummingbird_ml

In [1]:
import numpy as np
import torch
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import load_breast_cancer
from hummingbird.ml import convert


In [2]:
# We are going to use the breast cancer dataset from scikit-learn for this example.
X, y = load_breast_cancer(return_X_y=True)
nrows=15000
X = X[0:nrows].astype('|f4')
y = y[0:nrows]

In [3]:
# Create and train a random forest model.
model = RandomForestClassifier(n_estimators=10, max_depth=10)
model.fit(X, y)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
                       max_depth=10, max_features='auto', max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=10,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)

In [4]:
%%timeit -r 3

# Time for scikit-learn.
model.predict(X)

1.5 ms ± 5.5 µs per loop (mean ± std. dev. of 3 runs, 1000 loops each)


In [5]:
model = convert(model, 'pytorch', extra_config={"tree_implementation":"gemm"})

In [6]:
%%timeit -r 3

# Time for HB.
model.predict(X)

4.24 ms ± 184 µs per loop (mean ± std. dev. of 3 runs, 100 loops each)


In [7]:
model.to('cuda')

PyTorchBackendModel(
  (operator_map): ModuleDict(
    (SklearnRandomForestClassifier): GEMMDecisionTreeImpl()
  )
)

In [8]:
%%timeit -r 3

# Time for HB GPU.
model.predict(X)

467 µs ± 2.29 µs per loop (mean ± std. dev. of 3 runs, 1000 loops each)
