This notebook provides an example of loading DeGirum AI server with inferences. 

Script runs multiple batch inferences in multiple threads and measures average frame processing time.

You specify the following parameters:
 - the AI server hostname to use
 - the model name to use
 - the number of models (threads) runing in parallel
 - the number of frames to process
 - the image to run inferences on
 

In [13]:
#
# script global parameters
#
ai_server_address = None # fill in the IP address of AI server. Use localhost if running locally
model_name = "mobilenet_v2_ssd_coco--300x300_quant_n2x_orca_1" # model name to use
nmodels = 2 # number of models to run in parallel
nbatch = 100 # number of frames to process
img = "./images/TwoCats.jpg" # path to image file

In [14]:
import degirum as dg
import time
import threading

In [15]:
# connect to the AI server
zoo = dg.connect_model_zoo(ai_server_address)

In [16]:
# define thread function which runs inferences in a batch, measures total inference duration, 
# and prints average frame inference time
def one_batch(model, data, nbatch):
    
    def source():
        for n in range(nbatch):
            yield data            
    
    tstart = time.time_ns()
    for res in model.predict_batch(source()):
        pass
    frame_time_ms = (time.time_ns() - tstart) * 1e-6 / nbatch
    print(f"Model #{model.n} average frame time: {frame_time_ms}")

In [17]:
# load models
models = []
for n in range(nmodels):
    m = zoo.load_model(model_name)
    m.n = n      
    m._model_parameters.InputImgFmt = ["JPEG"]  # to save on network traffic, 
                                                # use JPEG data format when sending frames to AI server    
    models.append(m)
    
# run model image pre-processor once to get raw bytes to be sent to the model;
# this we do to skip pre-processing stage when running batch inference to minimize time variations
data = models[0]._preprocessor.forward(img)[0]

In [18]:
# create threads
threads = []
for m in models:
    t = threading.Thread(target=one_batch,args=(m, data, nbatch))
    threads.append(t)
    
# start threads    
for t in threads:
    t.start()
    
# wait when all threads finish
for t in threads:
    t.join()    

Model #0 average frame time: 22.590663
Model #1 average frame time: 22.665187999999997
