In [None]:
with display_output():
    trtis = infer.InferenceServer(url="localhost:8001")

In [None]:
trtis.available_models()

In [None]:
mnist = trtis.infer_runner("mnist")

## 4. Inspect Model

Query the `InferenceRunner` to see what it expects for inputs and what it will return for outputs.

In [None]:
mnist.input_bindings()

In [None]:
mnist.output_bindings()

## 5. Submit Infer Requests

`InferenceRunner.infer` accecpts a dict of numpy arrays that match the input description, submits this inference request to the backend compute engine and returns a future to a dict of numpy arrays.  

That means, this method should returns almost immediately; however, that does not mean the inference is complete.  Use `get()` to wait for the result.  This is a blocking call.

In [None]:
result = mnist.infer(Input3=np.random.random_sample([1,28,28]))
result # result is a future

In [None]:
result = result.get()
result # result is the value of the future - dict of np arrays

In [None]:
start = time.process_time()
result = mnist.infer(**{k: np.random.random_sample(v['shape']) for k,v in mnist.input_bindings().items()})
print("Queue Time: {}".format(time.process_time() - start))
result = result.get()
print("Compute Time: {}".format(time.process_time() - start))

## 6. Test for Correctness

Load test image and results.  [Thanks to the ONNX Model Zoo](https://github.com/onnx/models/tree/master/mnist) for this example.

In [None]:
inputs = utils.load_inputs("/work/models/onnx/mnist-v1.3/test_data_set_0")
expected = utils.load_outputs("/work/models/onnx/mnist-v1.3/test_data_set_0")

In [None]:
utils.mnist_image(inputs[0]).show()
expected[0]


Submit the images to the inference queue, then wait for each result to be returned.

In [None]:
results = [mnist.infer(Input3=input) for input in inputs]
results = [r.get() for r in results]

Check results.
TODO - update the utils to return dictionaries instead of arrays

In [None]:
for r, e in zip(results, expected):
    for key, val in r.items():
        r = val.reshape((1,10))
        np.testing.assert_almost_equal(r, e, decimal=3)
        print("Output Binding Name: {}; shape: {}".format(key, val.shape))
        print("Result: {}".format(np.argmax(utils.softmax(r))))
        # r # show the raw tensor

In [None]:
mnist.output_bindings()

In [None]:
result = mnist.infer(Input3=np.random.random_sample([1,28,28]))
result # result is a future

In [None]:
result = result.get()
result # result is the value of the future - dict of np arrays

In [None]:
start = time.process_time()
result = mnist.infer(**{k: np.random.random_sample(v['shape']) for k,v in mnist.input_bindings().items()})
print("Queue Time: {}".format(time.process_time() - start))
result = result.get()
print("Compute Time: {}".format(time.process_time() - start))