# Runnning an ONNX Model with ONNX Runtime!
Now that we have our model in the ONNX formate we can use ONNX Runtime perform inference with our model!<br>
[ONNX Runtime](https://onnxruntime.ai/)<br>
ONNX Runtime is not only avaliable as a Python library, but has versions in:
* C++
* C
* C#
* Java
* JavaScript
* Objective-C
* Julia and Ruby APIs 
<br>
<br>
All of which can use the same ONNX file!

In [None]:
import numpy as np
import onnxruntime
from PIL import Image
import json
import time

## You will need to Install onnxruntime

# If you don't have a GPU install cpu version
# pip install onnxruntime

# If you have a GPU install gpu version
# pip install onnxruntime-gpu

# Make sure you install the correct version for your version of CUDA!
# Also check dependencies!
# https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html
# EG for CUDA version 12.2 use 
# pip install onnxruntime-gpu==1.17

# NOTE Pytorch has it's own cuDNN that gets installed with torch
# If you want to use other applications that need cuDNNm like onnxruntime-gpu (without having to import torch)
# You need to install cuDNN separately (it doesn't come with NVIDIA Toolkit)
# NOTE: at time of writing only cuDNN 8.X versions are supported!!
# https://docs.nvidia.com/deeplearning/cudnn/archives/cudnn-890/install-guide/index.html

In [None]:
# Create an ONNX Runtime inference session with GPU support
ort_session = onnxruntime.InferenceSession("./efficientnet_b1.onnx", providers=['CUDAExecutionProvider'])

# Load image classification labels from JSON file (assuming labels are in imagenet_classes.json)
with open("../../data/imagenet_classes.json", "r") as file:
    img_net_classes = json.load(file)

## Create helper functions

In [None]:
def crop_resize(image, new_size):
    # Get the dimensions of the original image
    width, height = image.size

    # Calculate the size of the square crop
    min_dim = min(width, height)

    # Calculate coordinates for the center crop
    left = (width - min_dim) // 2
    upper = (height - min_dim) // 2
    right = left + min_dim
    lower = upper + min_dim

    # Crop the image to a square
    square_image = image.crop((left, upper, right, lower))

    # Resize the image to the specified size
    resized_image = square_image.resize((new_size, new_size))

    return resized_image

In [None]:
def image_normalise_reshape(image, mean, std):
    # Get image dimensions (height, width, channels)
    h, w, c = image.shape

    # Move channel dimension to the front (assuming PyTorch format) and normalize pixel values by 255
    image = image.transpose((2, 0, 1)) / 255.0  

    # Reshape mean and std into numpy arrays with proper dimensions for broadcasting
    np_means = np.array(mean).reshape(c, 1, 1)  
    np_stds = np.array(std).reshape(c, 1, 1)  

    # Normalize the image by subtracting the mean and dividing by the standard deviation (with epsilon for stability)
    norm_image = (image - np_means) / (np_stds + 1e-6)

    # Expand the dimension at index 0 to create a batch dimension (assuming batch size of 1)
    # and cast the data type to float32 for compatibility with most models
    return np.expand_dims(norm_image, 0).astype(np.float32)

## Load and test ONNX model

In [None]:
# Assuming a function 'crop_resize' exists for image cropping and resizing
test_image = crop_resize(Image.open("../../data/dog.jpg"), 224)

# 'test_image' now holds the cropped and resized image from the dog.jpg file

In [None]:
# Convert PIL image to numpy array
np_image = np.array(test_image)

# Define mean and standard deviation values (assuming these are for normalization)
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

# Normalize and reshape the image for inference using the 'image_normalise_reshape' function
norm_image = image_normalise_reshape(np_image, mean, std)

# Comment about batch processing (not implemented in this code block)
# Should also work with batch of images!
# norm_image_batch = np.concatenate((norm_image, norm_image), 0)

# Prepare input data for ONNX Runtime session
onnxruntime_input = {ort_session.get_inputs()[0].name: norm_image}

## How fast is inference?

In [None]:
# List to store inference times
inference_time = []

# Perform multiple inference runs (10 in this case)
for _ in range(10):
  # Record start time
  start_time = time.time()

  # Run inference using ONNX Runtime session
  onnxruntime_outputs = ort_session.run(None, onnxruntime_input)

  # Record end time
  end_time = time.time()

  # Calculate and store inference time for this run
  inference_time.append(end_time - start_time)

# Print the minimum inference time observed across the runs
print("Minimum inference time %.4fs" % np.min(inference_time))

In [None]:
# Print the outputs from ONNX Runtime inference
print("ONNX Runtime outputs:")
for output in onnxruntime_outputs:
    # Get the predicted class index (assuming the output represents class probabilities)
    class_index = np.argmax(output)
    print("Class index:", class_index)

    # Assuming 'img_net_classes' is a dictionary mapping class indices to labels
    # Look up the class label corresponding to the predicted class index
    print("Class Label:", img_net_classes.get(str(class_index)))  # No code change