In [1]:
from sklearn.ensemble import RandomForestClassifier
from hummingbird import convert_sklearn
from sklearn.model_selection import train_test_split
import numpy as np
import torch

#### Parameters for model

In [2]:
max_depth=7
num_classes=2
n_estimators=500
n_features=200
data_sz=200000
print("For #classes:{}, depth:{}, n_estimators/trees:{}".format(num_classes, max_depth, n_estimators))

For #classes:2, depth:7, n_estimators/trees:500


#### Create and fit the model

In [3]:
skl_model = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth)
X = np.random.rand(data_sz, n_features).astype('float32')
y = np.random.randint(num_classes, size=data_sz)

# larger than normal test_size for benchmarking
X_train, X_test, y_train, _ = train_test_split(X, y, test_size=0.80)
X_test_torch = torch.from_numpy(X_test)

In [4]:
skl_model.fit(X_train, y_train) # This will take a bit


RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
                       max_depth=7, max_features='auto', max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=500,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)

#### Time scikit-learn

In [5]:
%%timeit 
skl_model.predict(X_test)

7.67 s ± 29 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


#### Convert SKL model to PyTorch

In [6]:
pytorch_model = convert_sklearn(skl_model)

#### Time PyTorch - CPU

In [7]:
%%timeit 
pytorch_model(X_test_torch)

3.86 s ± 15.3 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


#### Switch PyTorch from CPU to GPU

In [8]:
pytorch_model.to('cuda')
X_test_torch_cuda = X_test_torch.to('cuda')

#### Time PyTorch - GPU

In [9]:
%%timeit 
pytorch_model(X_test_torch_cuda)

126 ms ± 4.81 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
