In [3]:
%pip install scikit-learn skl2onnx onnxruntime

Defaulting to user installation because normal site-packages is not writeable
Collecting scikit-learn
  Downloading scikit_learn-1.6.1-cp39-cp39-macosx_12_0_arm64.whl (11.1 MB)
[K     |████████████████████████████████| 11.1 MB 3.2 MB/s eta 0:00:01
[?25hCollecting skl2onnx
  Downloading skl2onnx-1.19.1-py3-none-any.whl (315 kB)
[K     |████████████████████████████████| 315 kB 16.5 MB/s eta 0:00:01
[?25hCollecting onnxruntime
  Downloading onnxruntime-1.19.2-cp39-cp39-macosx_11_0_universal2.whl (16.8 MB)
[K     |████████████████████████████████| 16.8 MB 14.4 MB/s eta 0:00:01
[?25hCollecting numpy>=1.19.5
  Downloading numpy-2.0.2-cp39-cp39-macosx_14_0_arm64.whl (5.3 MB)
[K     |████████████████████████████████| 5.3 MB 17.5 MB/s eta 0:00:01
[?25hCollecting threadpoolctl>=3.1.0
  Downloading threadpoolctl-3.6.0-py3-none-any.whl (18 kB)
Collecting scipy>=1.6.0
  Downloading scipy-1.13.1-cp39-cp39-macosx_12_0_arm64.whl (30.3 MB)
[K     |████████████████████████████████| 30.3 MB 22.9

In [4]:
import numpy as np
from sklearn.datasets import make_classification, make_regression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier, GradientBoostingRegressor
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, mean_squared_error

# ONNX conversion libraries
from skl2onnx import convert_sklearn
from skl2onnx.common.data_types import FloatTensorType
import onnxruntime as ort
import json

## Classification Example

In [5]:
# Generate classification data
X_class, y_class = make_classification(n_samples=1000, n_features=10, n_classes=2, random_state=42)
X_train_class, X_test_class, y_train_class, y_test_class = train_test_split(
    X_class, y_class, test_size=0.2, random_state=42
)

# Standardize features
scaler_class = StandardScaler()
X_train_scaled_class = scaler_class.fit_transform(X_train_class)
X_test_scaled_class = scaler_class.transform(X_test_class)

# Train Random Forest classifier
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
rf_classifier.fit(X_train_scaled_class, y_train_class)

# Evaluate
y_pred_class = rf_classifier.predict(X_test_scaled_class)
accuracy = accuracy_score(y_test_class, y_pred_class)
print(f"Random Forest accuracy: {accuracy:.4f}")

# Convert to ONNX
initial_type_class = [('float_input', FloatTensorType([None, 10]))]
onnx_model_class = convert_sklearn(
    rf_classifier, 
    initial_types=initial_type_class,
    target_opset=11
)

Random Forest accuracy: 0.8800


In [7]:
# Save ONNX model
with open("sklearn_classifier.onnx", "wb") as f:
    f.write(onnx_model_class.SerializeToString())

# Test ONNX model
ort_session_class = ort.InferenceSession("sklearn_classifier.onnx")
test_input_class = X_test_scaled_class[:1].astype(np.float32)
ort_inputs_class = {ort_session_class.get_inputs()[0].name: test_input_class}
ort_output_class = ort_session_class.run(None, ort_inputs_class)

print(f"Scikit-learn prediction: {rf_classifier.predict(test_input_class)[0]}")
print(f"ONNX prediction: {ort_output_class[0][0]}")


Scikit-learn prediction: 0
ONNX prediction: 0


In [8]:
# Save preprocessing parameters for all models
preprocessing_params = {
    'classifier': {
        'scaler_mean': scaler_class.mean_.tolist(),
        'scaler_scale': scaler_class.scale_.tolist(),
        'input_features': 10,
        'output_classes': 2,
        'model_type': 'RandomForestClassifier'
    },
    'framework': 'scikit-learn'
}

with open('sklearn_preprocessing.json', 'w') as f:
    json.dump(preprocessing_params, f)