## ONNX Model Pipeline: Preprocessor + Linear Regression

This Snippet demonstrates how to:
- Create a preprocessor ONNX model that expands a single input into 4 values (arithmetic progression).
- Train a scikit-learn Linear Regression model and export it to ONNX.
- Merge both models into a single ONNX file for end-to-end inference.


📋 Requirements
```bash
pip install numpy scikit-learn onnx onnxruntime skl2onnx
```

In [17]:
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error


In [18]:
# Sample dataset with 4 features (X) and 1 target (y)
# Let's assume we have 100 samples
np.random.seed(42)
X = np.random.rand(100, 4)  # 100 samples, 4 features
y = 2 * X[:, 0] + 3 * X[:, 1] - 1.5 * X[:, 2] + 0.5 * X[:, 3] + np.random.normal(0, 0.1, 100)  # Linear relationship with noise

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train the Linear Regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse:.4f}")
print(f"Coefficients: {model.coef_}")       # Learned coefficients for each feature
print(f"Intercept: {model.intercept_:.4f}") # Learned intercept

# Example prediction for a new input (4 features)
new_input = np.array([[0.5, 0.3, 0.2, 0.4]])  # Must be 2D array
predicted_output = model.predict(new_input)
print(f"Predicted output for {new_input[0]}: {predicted_output[0]:.4f}")

Mean Squared Error: 0.0084
Coefficients: [ 2.05313757  2.93184642 -1.48006291  0.47695218]
Intercept: 0.0115
Predicted output for [0.5 0.3 0.2 0.4]: 1.8123


## SKL -> ONNX

In [19]:
from skl2onnx import convert_sklearn
from skl2onnx.common.data_types import FloatTensorType

# Define input shape (4 features)
initial_type = [('float_input', FloatTensorType([None, 4]))]

# Convert the model to ONNX
onnx_model = convert_sklearn(model, initial_types=initial_type)

# Save the ONNX model to a file
with open("linear_regression.onnx", "wb") as f:
    f.write(onnx_model.SerializeToString())

print("Model converted to ONNX and saved as 'linear_regression.onnx'")

Model converted to ONNX and saved as 'linear_regression.onnx'


## Preprocessor ONNX Model

In [20]:
import numpy as np
import onnx
from onnx import helper, TensorProto

# Define the 
# Input: scalar value (shape: [1])
input_value = helper.make_tensor_value_info(
    "input", TensorProto.FLOAT, [1]  # Input is a single float
)

# Output: 4 values (shape: [4])
output_value = helper.make_tensor_value_info(
    "output", TensorProto.FLOAT, [4]  # Output is 4 floats
)

# Nodes to compute output = [input, input+1, input+2, input+3]
nodes = [
    # Create constants for 1, 2, 3 (as floats)
    helper.make_node(
        "Constant", [], ["one"], value=helper.make_tensor("one", TensorProto.FLOAT, [1], [1.0])
    ),
    helper.make_node(
        "Constant", [], ["two"], value=helper.make_tensor("two", TensorProto.FLOAT, [1], [2.0])
    ),
    helper.make_node(
        "Constant", [], ["three"], value=helper.make_tensor("three", TensorProto.FLOAT, [1], [3.0])
    ),

    # Compute input + 1, input + 2, input + 3
    helper.make_node("Add", ["input", "one"], ["input_plus_1"]),
    helper.make_node("Add", ["input", "two"], ["input_plus_2"]),
    helper.make_node("Add", ["input", "three"], ["input_plus_3"]),

    # Concatenate all 4 values into a single tensor
    helper.make_node(
        "Concat", 
        ["input", "input_plus_1", "input_plus_2", "input_plus_3"], 
        ["output"],
        axis=0  # Concatenate along the 0th axis
    )
]

# Create the graph
graph = helper.make_graph(
    nodes,
    "AP_Expander_Graph",
    [input_value],
    [output_value]
)

# Create the ONNX model
model = helper.make_model(graph, producer_name="AP-Expander")
output_path = "./ap_expander.onnx"
onnx.save(model, output_path)
print(f"Saved ONNX model to {output_path}")


Saved ONNX model to ./ap_expander.onnx


In [21]:
import onnxruntime as rt
import numpy as np

# Load the ONNX model
sess = rt.InferenceSession("ap_expander.onnx", providers=["CPUExecutionProvider"])

# Test input (e.g., 2.0)
input_name = sess.get_inputs()[0].name
test_input = np.array([2.0], dtype=np.float32)  # Must be float32

# Run inference
output = sess.run(None, {input_name: test_input})[0]

print(f"Input: {test_input[0]}")
print(f"Output: {output}")  # Should be [2.0, 3.0, 4.0, 5.0]

Input: 2.0
Output: [2. 3. 4. 5.]


## Combine the Models

In [22]:
import onnx
from onnx import version_converter
from onnx.compose import merge_models

# Load both models
preprocessor = onnx.load("ap_expander.onnx")
lr_model = onnx.load("linear_regression.onnx")
preprocessor_opset21 = version_converter.convert_version(preprocessor, 21)

# Combine them (preprocessor -> linear regression)
combined_model = merge_models(
    preprocessor_opset21,  # First model
    lr_model,      # Second model
    io_map=[("output", "float_input")]  # Maps preprocessor's output to LR's input
)

# Save the combined model
onnx.save(combined_model, "combined_model.onnx")
print("Combined model saved as 'combined_model.onnx'")

Combined model saved as 'combined_model.onnx'


In [23]:
import onnxruntime as rt
import numpy as np

sess = rt.InferenceSession("combined_model.onnx", providers=["CPUExecutionProvider"])

# Test input (e.g., 2.0)
input_name = sess.get_inputs()[0].name
test_input = np.array([2.0], dtype=np.float32)  # Must be float32

# Run inference
output = sess.run(None, {input_name: test_input})[0]
print(f"Input: {test_input[0]}")
print(f"Prediction: {output[0]}")

Input: 2.0
Prediction: [9.37778]
