In [1]:
import numpy as np
from sklearn.datasets import make_regression
import catboost

import pandas as pd

In [2]:
import numpy as np
from sklearn.datasets import make_regression
from sklearn.linear_model import LinearRegression
from xgboost import XGBRegressor
# import  random forest regressor
from sklearn.ensemble import RandomForestRegressor
from joblib import dump, load
import onnxmltools


from os import path
from os import listdir
from os.path import isfile, join
from skl2onnx.common.data_types import FloatTensorType


In [3]:
# Generación del dataset
n_samples = 1000
n_features = 40
X, y = make_regression(n_samples=n_samples, n_features=n_features, random_state=0)


In [4]:
# for this dataset train a linear regression model, a random forest model and a catboost model 
# and save them to disk


models_path = 'models'

# linear regression
lr = LinearRegression()
lr.fit(X, y)
dump(lr, join(models_path,'lr.joblib') )

# random forest
rf = RandomForestRegressor()
rf.fit(X, y)
dump(rf,  join(models_path,'rf.joblib') )

# catboost
catb = catboost.CatBoostRegressor()
catb.fit(X, y, verbose=False)

# save the model to disk
dump(catb, join(models_path,'catb.joblib'))




['models/catb.joblib']

In [7]:
# Convert the models to ONNX format
initial_type = [('float_input', FloatTensorType([None, n_features]))]

# linear regression
lr_onnx = onnxmltools.convert_sklearn(lr, 'lr', initial_types=initial_type)
onnxmltools.utils.save_model(lr_onnx, join(models_path,'lr.onnx'))


In [8]:

# random forest
rf_onnx = onnxmltools.convert_sklearn(rf, 'rf', initial_types=initial_type)
onnxmltools.utils.save_model(rf_onnx, join(models_path,'rf.onnx'))


In [9]:

# Save model to ONNX-ML format
catb.save_model(
    join(models_path,"catb.onnx"),
    format="onnx",
    export_parameters={
        'onnx_domain': 'ai.catboost',
        'onnx_model_version': 1,
        'onnx_doc_string': 'test model for Regression',
        'onnx_graph_name': 'CatBoostModel_for_Regression'
    }
)

Bad pipe message: %s [b'\x83\\,\x0b\xa3\xb3y\xbc\xa6\x93\x8f\xbf8\xc2\x19\x9cp\x1a\x00\x00|\xc0,\xc00\x00\xa3\x00\x9f\xcc\xa9\xcc']
Bad pipe message: %s [b"\xaa\xc0\xaf\xc0\xad\xc0\xa3\xc0\x9f\xc0]\xc0a\xc0W\xc0S\xc0+\xc0/\x00\xa2\x00\x9e\xc0\xae\xc0\xac\xc0\xa2\xc0\x9e\xc0\\\xc0`\xc0V\xc0R\xc0$\xc0(\x00k\x00j\xc0#\xc0'\x00g\x00@\xc0\n\xc0\x14\x009\x008\xc0\t\xc0\x13\x003\x002\x00\x9d\xc0\xa1\xc0\x9d\xc0Q\x00\x9c\xc0\xa0\xc0\x9c\xc0P\x00=\x00<\x005\x00/\x00\x9a\x00\x99\xc0\x07\xc0\x11\x00\x96\x00\x05\x00\xff\x01\x00\x00j\x00\x00\x00\x0e\x00\x0c\x00\x00\t127.0.0.1\x00\x0b\x00\x04\x03\x00\x01\x02\x00\n\x00\x0c\x00\n\x00\x1d\x00\x17\x00\x1e\x00\x19\x00\x18\x00#\x00\x00\x00\x16\x00\x00\x00\x17"]
Bad pipe message: %s [b'2n\xf6\xc2~-\xe1\x03?\xf2\x97\xcf:\xdd\xc3\xae,\xa1\x00\x00>\xc0\x14\xc0\n\x009\x008\x007\x006\xc0\x0f\xc0\x05\x005\xc0\x13\xc0\t\x003\x002\x001\x000\xc0\x0e\xc0\x04\x00/\x00\x9a\x00\x99\x00\x98\x00\x97\x00\x96\x00\x07\xc0\x11\xc0\x07\xc0\x0c\xc0\x02\x00\x05\x00\x04\x00\xff\

In [25]:
## make a benchmark of the models 
# load the models from disk

lr = load(join(models_path,'lr.joblib'))
rf = load(join(models_path,'rf.joblib'))
catb = load(join(models_path,'catb.joblib'))

# load the models from ONNX format
lr_onnx = onnxmltools.utils.load_model(join(models_path,'lr.onnx'))
rf_onnx = onnxmltools.utils.load_model(join(models_path,'rf.onnx'))
catb_onnx = onnxmltools.utils.load_model(join(models_path,'catb.onnx'))

# make predictions with the models and save the times 


In [33]:
import onnxruntime as rt

In [82]:
# Generación del dataset
n_samples = 1000000
n_features = 10
X, y = make_regression(n_samples=n_samples, n_features=n_features, random_state=0)

In [83]:
%time
pred = lr.predict(X)

CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 5.96 µs


In [84]:
tic = time.time()
pred = rf.predict(X)
toc = time.time()
print('Random Forest: ' + str(1000*(toc - tic)) + 'ms')

Random Forest: 9690.6418800354ms


In [85]:
tic = time.time()

pred = catb.predict(X)
toc = time.time()

print('Catboost: ' + str(1000*(toc - tic)) + 'ms')

Catboost: 3740.0848865509033ms


In [86]:
sess = rt.InferenceSession(join(models_path,'lr.onnx'))



In [87]:
%time 
pred = sess.run(None, {'float_input': X.astype(np.float32)})[0]

CPU times: user 3 µs, sys: 0 ns, total: 3 µs
Wall time: 5.25 µs


In [88]:

sess = rt.InferenceSession(join(models_path,'rf.onnx'))


In [89]:
tic = time.time()
pred = sess.run(None, {'float_input': X.astype(np.float32)})[0]
toc = time.time()
print('Random Forest: ' + str(1000*(toc - tic)) + 'ms')


Random Forest: 12618.792295455933ms


In [90]:
sess = rt.InferenceSession(join(models_path,'catb.onnx'))

In [91]:
%time 
predictions = sess.run(['predictions'],
                       {'features':  X.astype(np.float32)})

CPU times: user 3 µs, sys: 0 ns, total: 3 µs
Wall time: 5.72 µs


2023-02-14 04:50:07.685783003 [W:onnxruntime:, execution_frame.cc:835 VerifyOutputSizes] Expected shape from model of {-1} does not match actual shape of {1000000,1} for output predictions


In [92]:
tic = time.time()
predictions = sess.run(['predictions'],
                          {'features':  X.astype(np.float32)})
toc = time.time()
print('time: ', toc-tic)

2023-02-14 04:50:41.721601210 [W:onnxruntime:, execution_frame.cc:835 VerifyOutputSizes] Expected shape from model of {-1} does not match actual shape of {1000000,1} for output predictions


time:  33.581913471221924
