# Using ONNX Runtime for inference with Konduit Serving 


## Download file 

In [1]:
import os 
from urllib.request import urlretrieve 
dl_path = os.path.abspath("../data/facedetector/facedetector.onnx")
DOWNLOAD_URL = "https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/master/models/onnx/version-RFB-320.onnx"
if not os.path.isfile(dl_path):
    urlretrieve(DOWNLOAD_URL, filename=dl_path)

## Optimize 

In [2]:
import onnx
from onnx import optimizer

onnx_model = onnx.load(dl_path)
passes = ["extract_constant_to_initializer", "eliminate_unused_initializer"]
optimized_model = optimizer.optimize(onnx_model, passes)
onnx.save(optimized_model, dl_path)

In [4]:
python_code = """

from PIL import Image 
import torchvision.transforms as transforms
import onnxruntime
import io 
import base64
import os 
from utils import base64_to_ndarray
dl_path = os.path.abspath("../data/facedetector/facedetector.onnx")

image = Image.fromarray(image.astype('uint8')[0], 'RGB')
resize = transforms.Resize([240, 320])
img_y = resize(image)
to_tensor = transforms.ToTensor()
img_y = to_tensor(img_y)
img_y.unsqueeze_(0)

def to_numpy(tensor):
    return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()

input_img = to_numpy(img_y)

ort_session = onnxruntime.InferenceSession(dl_path)
ort_inputs = {ort_session.get_inputs()[0].name: to_numpy(img_y)}
ort_outs = ort_session.run(None, ort_inputs)
img_out_y = ort_outs[0]
"""

In [5]:
import os 
from konduit import PythonConfig, ServingConfig, InferenceConfiguration
from konduit import PythonStep
from konduit.server import Server
from konduit.client import Client 
from konduit.utils import default_python_path

import sys 
import numpy as np 
import time 
from PIL import Image 

work_dir = os.path.abspath('.')

python_config = PythonConfig(
    python_code=python_code,
    python_inputs={"image": "NDARRAY"}, 
    python_outputs={"img_out_y": "NDARRAY"}, 
    python_path=default_python_path(work_dir)
)

onnx_step = PythonStep().step(python_config)

port = np.random.randint(1000, 65535)

server = Server(
    steps=onnx_step, 
    serving_config=ServingConfig(http_port=port)
)

server.start()
time.sleep(30)

In [6]:
server.config.as_dict()

{'@type': 'InferenceConfiguration',
 'pipelineSteps': [{'@type': 'PythonStep',
   'inputSchemas': {'default': ['NDArray']},
   'outputSchemas': {'default': ['NDArray']},
   'inputNames': ['default'],
   'outputNames': ['default'],
   'inputColumnNames': {'default': ['image']},
   'outputColumnNames': {'default': ['img_out_y']},
   'pythonConfigs': {'default': {'@type': 'PythonConfig',
     'pythonCode': '\n\nfrom PIL import Image \nimport torchvision.transforms as transforms\nimport onnxruntime\nimport io \nimport base64\nimport os \nfrom utils import base64_to_ndarray\ndl_path = os.path.abspath("../data/facedetector/facedetector.onnx")\n\nimage = Image.fromarray(image.astype(\'uint8\')[0], \'RGB\')\nresize = transforms.Resize([240, 320])\nimg_y = resize(image)\nto_tensor = transforms.ToTensor()\nimg_y = to_tensor(img_y)\nimg_y.unsqueeze_(0)\n\ndef to_numpy(tensor):\n    return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()\n\ninput_img = to_numpy(img_y

In [7]:
client = Client(
    input_type='NUMPY',
    return_output_type='NUMPY',
    endpoint_output_type="RAW",
    url='http://localhost:' + str(port)
)

im = Image.open("../data/facedetector/1.jpg")
im = np.array(im).astype("int")

a = client.predict(
    {"default": im}
)
print(a)

[[[0.9309567  0.06904326]
  [0.9341099  0.06589006]
  [0.935605   0.06439508]
  ...
  [0.93740726 0.06259278]
  [0.94315267 0.05684736]
  [0.95290583 0.04709423]]]


In [8]:
server.stop()