In [13]:
pip install onnx onnxmltools xgboost

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.0 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [22]:
import pickle
import xgboost as xgb
import onnxmltools
from skl2onnx.common.data_types import FloatTensorType

# Load the XGBoost model
model_path = 'models/xgb_model.pkl'  # Update this path
with open(model_path, 'rb') as model_file:
    xgb_model = pickle.load(model_file)

# Get the booster from the model
booster = xgb_model.get_booster()

# Extract feature names from the booster (if available) or generate default ones
if hasattr(booster, 'feature_names') and booster.feature_names:
    feature_names = booster.feature_names
else:
    feature_names = ['f' + str(i) for i in range(len(booster.get_dump()[0].split('\n')[0].split(',')))]  # default names

# Print the feature names and their indices
print("Feature names and their indices:")
for i, feature in enumerate(feature_names):
    print(f"f{i}: {feature}")

# Define the number of features
num_features = len(feature_names)

# Ensure feature names follow ONNX's expected format (f0, f1, f2, ...)
modified_feature_names = ['f' + str(i) for i in range(num_features)]

# Update the booster with the correct feature names
booster.feature_names = modified_feature_names

# Define the input types for the ONNX conversion (based on number of features)
initial_types = [('input', FloatTensorType([None, num_features]))]

# Convert the XGBoost model to ONNX format
onnx_model = onnxmltools.convert.convert_xgboost(xgb_model, initial_types=initial_types)

# Save the ONNX model
onnx_model_path = 'models/xgb_model.onnx'  # Update this path
onnxmltools.utils.save_model(onnx_model, onnx_model_path)

print(f"ONNX model saved to {onnx_model_path}")


Feature names and their indices:
f0: Category
f1: Sub Category
f2: City
f3: Region
f4: Profit
f5: Discount
ONNX model saved to models/xgb_model.onnx


In [25]:
import joblib
import skl2onnx
from skl2onnx import convert_sklearn
from skl2onnx.common.data_types import FloatTensorType

# Load the scaler model
scaler_path = 'models/scaler.pkl'  # Update this path
scaler = joblib.load(scaler_path)

# Define the input type for the scaler (assuming it was trained on 6 features)
initial_type = [('input', FloatTensorType([None, 6]))]  # Update 6 based on your actual number of features

# Convert the scaler to ONNX format
onnx_scaler = convert_sklearn(scaler, initial_types=initial_type)

# Save the ONNX model
onnx_model_path = 'models/scaler.onnx'  # Update this path
onnx.save_model(onnx_scaler, onnx_model_path)

print(f"ONNX model saved to {onnx_model_path}")


ONNX model saved to models/scaler.onnx


In [27]:
import joblib
import numpy as np
import skl2onnx
from skl2onnx import convert_sklearn
from skl2onnx.common.data_types import StringTensorType
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.preprocessing import LabelEncoder

# Custom transformer to wrap LabelEncoder
class LabelEncoderWrapper(BaseEstimator, TransformerMixin):
    def __init__(self):
        self.encoder = LabelEncoder()

    def fit(self, X, y=None):
        # Fit the label encoder on the data
        self.encoder.fit(X)
        return self

    def transform(self, X):
        # Transform the labels using the fitted encoder
        return self.encoder.transform(X).reshape(-1, 1)  # Reshaping for ONNX compatibility

# Load the LabelEncoder model
label_encoder_path = 'models/label_encoders.pkl'  # Update this path
label_encoder = joblib.load(label_encoder_path)

# Wrap the loaded LabelEncoder in the custom transformer
wrapper = LabelEncoderWrapper()
wrapper.encoder = label_encoder

# Define the input type for the LabelEncoder (assuming it accepts a single string input)
initial_type = [('input', StringTensorType([None, 1]))]  # Assuming it accepts one string feature

# Convert the custom LabelEncoder wrapper to ONNX format
onnx_label_encoder = convert_sklearn(wrapper, initial_types=initial_type)

# Save the ONNX model
onnx_model_path = 'models/label_encoders.onnx'  # Update this path
onnx.save_model(onnx_label_encoder, onnx_model_path)

print(f"ONNX LabelEncoder model saved to {onnx_model_path}")


MissingShapeCalculator: Unable to find a shape calculator for type '<class '__main__.LabelEncoderWrapper'>'.
It usually means the pipeline being converted contains a
transformer or a predictor with no corresponding converter
implemented in sklearn-onnx. If the converted is implemented
in another library, you need to register
the converted so that it can be used by sklearn-onnx (function
update_registered_converter). If the model is not yet covered
by sklearn-onnx, you may raise an issue to
https://github.com/onnx/sklearn-onnx/issues
to get the converter implemented or even contribute to the
project. If the model is a custom model, a new converter must
be implemented. Examples can be found in the gallery.
