# ONNX Conversion

ONNX Runtime is a cross-platform machine-learning model accelerator, with a flexible interface to integrate hardware-specific libraries. ONNX Runtime can be used with models from PyTorch, Tensorflow/Keras, TFLite, scikit-learn, and other frameworks.

In [1]:
%%capture
!pip install tf2onnx onnxruntime

In [2]:
import tensorflow as tf 
from tensorflow import keras 
import numpy as np
import tf2onnx
import onnx

# Train a Model

In [3]:
mnist = keras.datasets.mnist 
(train_images, train_labels), (test_images, test_labels) = mnist.load_data() 

# Normalize the input image so that each pixel value is between 0 to 1. 
train_images = (train_images / 255.0).astype(np.float32) 
test_images = (test_images / 255.0).astype(np.float32) 

# Define the model architecture 
model = keras.Sequential([ 
    keras.layers.InputLayer(input_shape=(28, 28)), 
    keras.layers.Reshape(target_shape=(28, 28, 1)), 
    keras.layers.Conv2D(filters=12, kernel_size=(3, 3), activation=tf.nn.relu), 
    keras.layers.MaxPooling2D(pool_size=(2, 2)),
    keras.layers.Flatten(), 
    keras.layers.Dense(10) 
]) 
# Train the digit classification model 
model.compile(
    optimizer='adam', 
    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), 
    metrics=['accuracy']
) 

model.fit(train_images, train_labels, epochs=5, validation_data=(test_images, test_labels))

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f5cf18d9c10>

# Convert to ONNX

In [4]:
input_signature = [tf.TensorSpec([1, 28, 28], tf.float32, name='x')]
onnx_model, _ = tf2onnx.convert.from_keras(model, input_signature, opset=13)
onnx.save(onnx_model, "model.onnx")

# Inference

In [5]:
import onnxruntime as ort
import numpy as np

input_ = np.zeros((1, 28, 28), np.float32)

# Start from ORT 1.10, ORT requires explicitly setting the providers parameter if you want to use execution providers
# other than the default CPU provider (as opposed to the previous behavior of providers getting set/registered by
# default based on the build flags) when instantiating InferenceSession.
# Following code assumes NVIDIA GPU is available, you can specify other execution providers or don't include 
# providers parameter to use default CPU provider.
sess = ort.InferenceSession("model.onnx")
# For NVIDIA GPU
# sess = ort.InferenceSession("dst/path/model.onnx", providers=["CUDAExecutionProvider"])

# Set first argument of sess.run to None to use all model outputs in default order
# Input/output names are printed by the CLI and can be set with --rename-inputs and --rename-outputs
# If using the python API, names are determined from function arg names or TensorSpec names.
results_ort = sess.run([sess.get_outputs()[0].name], {sess.get_inputs()[0].name: input_})
print(results_ort)


[array([[-1.7204155 ,  5.6252193 , -0.12981272, -2.193227  , -1.0933723 ,
         1.5632545 , -1.8942062 ,  2.3993464 , -6.854724  , -2.7378263 ]],
      dtype=float32)]
