This notebook measures performance of PySDK batch prediction. It reports average frame processing times: core inference time of AI accelerator and total frame inference time. To exclude any pre-processing overhead, the input frame is resized to model input size and converted to binary array.

In [None]:
# import DeGirum PySDK
import degirum as dg
import time

In [None]:
# connect to ai server model zoo
ai_server_address = None # fill in the IP address of AI server. Use localhost if running locally
zoo = dg.connect_model_zoo(ai_server_address)

# list all AI models available for inference
zoo.list_models()

In [None]:
# load AI model 'ssd_mobilenet_v2' for DeGirum Orca AI accelerator
# (change model name to "...n2x_cpu_1" to run it on CPU)
# You can use any other model to do benchmark for
model = zoo.load_model("mobilenet_v2_ssd_coco--300x300_quant_n2x_orca_1")

In [None]:
# take some image and prepare binary array resized to model input size
blob = model._preprocessor.forward("./images/TwoCats.jpg")[0]

# uncomment this line to include pre-processing overhead
# blob = "./images/TwoCats.jpg"

In [None]:
nframes = 200 # number of frames to measure time for

model.measure_time = True # enable time statistics collection
model.reset_time_stats() # reset time statistics

# perform AI model batch inference on a long list of frames and measure total duration
tstart = time.time_ns()
for res in model.predict_batch([blob] * nframes):
    pass
frame_time_ms = (time.time_ns() - tstart) * 1e-6 / nframes

ai_time_ms = model.time_stats()['CoreInferenceDuration_ms'].avg

print(f"Average per-frame times ---------------------------------")
print(f"Pure AI inference time: {ai_time_ms:.1f} ms")
print(f"Total inference time:   {frame_time_ms:.1f} ms")
print(f"Non-pipelined overhead: {(frame_time_ms - ai_time_ms):.2f} ms")