<a href="https://colab.research.google.com/github/TheRadDani/ONNX_inference_for_multiple_targets/blob/main/ONNX_Inference_different_targets.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import sys
!{sys.executable} -m pip install onnxruntime-gpu onnx onnxconverter_common==1.8.1 pillow --quiet

In [None]:
from torchvision import models, datasets, transforms as T
import torch
from PIL import Image
import numpy as np

In [None]:
resnet50 = models.resnet50(pretrained=True)

# Download ImageNet Labels
!curl -o imagenet_classes.txt https://raw.githubusercontent.com/pytorch/hub/master/imagenet_classes.txt

# Read the categories
with open("imagenet_classes.txt", "r") as f:
  categories = [s.strip() for s in f.readlines()]

# Export the model to ONNX
image_height = 224
image_width = 224
x = torch.randn(1, 3, image_height, image_width, requires_grad=True)
torch_out = resnet50(x)
torch.onnx.export(resnet50,                     # model being run
                  x,                            # model input (or a tuple for multiple inputs)
                  "resnet50.onnx",              # where to save the model (can be a file or file-like object)
                  export_params=True,           # store the trained parameter weights inside the model file
                  opset_version=12,             # the ONNX version to export the model to
                  do_constant_folding=True,     # whether to execute constant folding for optimization
                  input_names = ['input'],      # the model's input names
                  output_names = ['output'])    # the model's output names

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0100 10472  100 10472    0     0   161k      0 --:--:-- --:--:-- --:--:--  162k


In [None]:
resnet50.eval()
filename = '/content/cat.jpg'

input_image = Image.open(filename)
preprocess = T.Compose([
  T.Resize(256),
  T.CenterCrop(224),
  T.ToTensor(),
  T.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225]),
])
input_tensor = preprocess(input_image)
input_batch = input_tensor.unsqueeze(0)

print("GPU Availability: ", torch.cuda.is_available())
if torch.cuda.is_available():
  input_batch = input_batch.to('cuda')
  resnet50.to('cuda')

GPU Availability:  True


## Inference ResNet-50 ONNX Model with ONNX Runtime

In [None]:
import onnxruntime
from onnx import numpy_helper
import time

session_fp32 = onnxruntime.InferenceSession("resnet50.onnx", providers=["OpenVINOExecutionProvider"])

def softmax(x):
  """
    Compute softmax values for each set of scores in x.
  """
  e_x = np.exp(x - np.max(x))
  return e_x / e_x.sum()

latency = []
def run_sample(session, image_file, categories, inputs):
  start = time.time()
  input_arr = inputs.cpu().detach().numpy()
  ort_outputs = session.run(None, {'input': input_arr})[0]
  latency.append(time.time() - start)
  output = ort_outputs.flatten()
  output = softmax(output)
  top5_catid = np.argsort(-output)[:5]
  for catid in top5_catid:
        print(categories[catid], output[catid])
  return ort_outputs


ort_output = run_sample(session_fp32, filename, categories, input_batch)
print(f"ONNX Runtime CPU_GPU/OpenVINO Inference Time = \
{(sum(latency) * 1000)/ len(latency):2f} ms")

tabby 0.20865938
Egyptian cat 0.2022892
lynx 0.19073758
tiger cat 0.15811343
hamper 0.02069059
ONNX Runtime CPU_GPU/OpenVINO Inference Time = 69.701195 ms


## Comparison with OpenVINO


In [None]:
!pip install openvino-dev --quiet

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.7/4.7 MB[0m [31m82.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.6/42.6 MB[0m [31m8.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m69.9 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
# inference with OpenVINO
from openvino.runtime import Core

ei = Core()
onnx_model_path = "./resnet50.onnx"
model_onnx = ei.read_model(onnx_model_path)
compiled_model = ei.compile_model(model=model_onnx, device_name="CPU")

# Inference
output_layer = next(iter(compiled_model.outputs))

letency = []
input_arr = input_batch.cpu().detach().numpy()
inputs = {'input':input_arr}
start = time.time()
request = compiled_model.create_infer_request()
output = request.infer(inputs=inputs)

outputs = request.get_output_tensor(output_layer.index).data
output = outputs.flatten()
output = softmax(output)
top5_catid = np.argsort(-output)[:5]
for catid in top5_catid:
      print(categories[catid], output[catid])

latency.append(time.time() - start)

print("OpenVINO CPU Inference time = {} ms".format(format(sum(latency) * 1000 / len(latency), '.2f')))

print("***** Verifying correctness *****")
for i in range(2):
    print('OpenVINO and ONNX Runtime output {} are close:'.format(i), np.allclose(ort_output, outputs, rtol=1e-05, atol=1e-04))

tabby 0.20865975
Egyptian cat 0.20229013
lynx 0.19073662
tiger cat 0.1581131
hamper 0.020690616
OpenVINO CPU Inference time = 74.33 ms
***** Verifying correctness *****
OpenVINO and ONNX Runtime output 0 are close: True
OpenVINO and ONNX Runtime output 1 are close: True


array([1, 2, 2, 2])