# Training XGBOOST Model and Converting to ONNX Format!

In [1]:
import pandas as pd
import numpy as np
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
import xgboost as xgb
try:
    import re2 as re
except:
    import re

# For ONNX
import onnx
import onnxruntime
from onnxmltools.convert.common import data_types
from onnxmltools.convert import convert_xgboost

### The code below is a sample code used to train an xgboost model with digits data.

In [2]:
digits = load_digits()
X, y = digits.data, digits.target  # Our train data shape is (x, 64) where x is total samples
X_train, X_test, y_train, y_test = train_test_split(X, y)

booster = xgb.XGBClassifier(max_depth=3,
                            booster='dart',
                            eta=0.3,
                            silent=1,
                            n_estimators=100,
                            num_class=10)

booster.fit(X_train, y_train)

Parameters: { "silent" } are not used.



In [3]:
pd.DataFrame(X_test)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,54,55,56,57,58,59,60,61,62,63
0,0.0,0.0,10.0,13.0,2.0,0.0,0.0,0.0,0.0,0.0,...,11.0,0.0,0.0,0.0,8.0,14.0,8.0,11.0,14.0,1.0
1,0.0,0.0,0.0,16.0,11.0,1.0,0.0,0.0,0.0,0.0,...,4.0,0.0,0.0,0.0,1.0,11.0,16.0,13.0,1.0,0.0
2,0.0,0.0,0.0,0.0,11.0,15.0,4.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,11.0,16.0,12.0,0.0
3,0.0,0.0,1.0,13.0,16.0,10.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,15.0,7.0,0.0,0.0,0.0
4,0.0,0.0,3.0,16.0,12.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,2.0,13.0,9.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
445,0.0,0.0,0.0,6.0,16.0,4.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,7.0,16.0,3.0,0.0,0.0
446,0.0,0.0,0.0,6.0,15.0,11.0,2.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,5.0,12.0,16.0,2.0,0.0
447,0.0,0.0,15.0,15.0,2.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,2.0,16.0,4.0,0.0,0.0,0.0,0.0
448,0.0,0.0,0.0,7.0,14.0,16.0,6.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,9.0,6.0,0.0,0.0,0.0


In [4]:
pd.DataFrame(y_test)

Unnamed: 0,0
0,2
1,0
2,2
3,7
4,1
...,...
445,4
446,8
447,7
448,7


### Convert model to ONNX format
After we get the model from xgboost, we can convert the model to onnx with the onnxmltools.

First, we define the input from the model, this model use float input with shape (1, 64), so we define initial_type as follows.

In [5]:
initial_type = [('float_input', data_types.FloatTensorType([1, 64]))]

After that we can immediately change xgboost to onnx using convert_xgboost from onnxmltools and save the model to xgboost.onnx

In [6]:
booster_onnx = convert_xgboost(booster, initial_types=initial_type)
onnx.save(booster_onnx, 'xgboost.onnx')