## Performance Test for Single-Model Inference
This notebook contains performance measurements for all Orca-based image detection AI models from DeGirum 
public model zoo

This script works with the following inference options:

1. Run inference on DeGirum Cloud Platform;
2. Run inference on DeGirum AI Server deployed on a localhost or on some computer in your LAN or VPN;
3. Run inference on DeGirum ORCA accelerator directly installed on your computer.

To try different options, you just need to uncomment **one** of the lines in the code below.

You also need to specify your cloud API access token, cloud zoo URLs, and AI server hostname in [env.ini](env.ini) file, located in the same directory as this notebook.

#### Specify test options here

In [None]:
iterations = 100  # how many iterations to run for each model
use_jpeg = True  # use JPEG or bitmap model input
exclude_preprocessing = True  # exclude preprocessing step from timing measurements
batch_size = None  # eager batch size to test; None to use default


#### Specify where do you want to run your inferences

In [None]:
import degirum as dg, mytools

cloud_token = mytools.get_token()  # get cloud API access token from env.ini file
cloud_zoo_url = mytools.get_cloud_zoo_url()  # get cloud zoo URL from env.ini file

#
# Please UNCOMMENT only ONE of the following lines to specify where to run AI inference
#

# 1. Inference on the DeGirum Cloud Platform
zoo = dg.connect(dg.CLOUD, cloud_zoo_url, cloud_token)

# 2. Inference on DeGirum AI Server deployed on a localhost or on some computer in your LAN or VPN
# zoo = dg.connect(mytools.get_ai_server_hostname(), cloud_zoo_url, cloud_token)

# 3. Inference on DeGirum ORCA accelerator installed on your computer
# zoo = dg.connect(dg.LOCAL, cloud_zoo_url, cloud_token)


#### The rest of the cells below should run without any modifications

In [None]:
# list of models to test
model_names = zoo.list_models(device="ORCA")

# define function to run a single model batch prediction
def do_test(model_name):

    # load model
    with zoo.load_model(model_name) as model:

        # skip non-image type models
        if model.model_info.InputType[0] != "Image":
            return None

        # configure model
        model.image_backend = "opencv"  # select OpenCV backend
        model.input_numpy_colorspace = "BGR"
        model._model_parameters.InputImgFmt = ["JPEG" if use_jpeg else "RAW"]
        model.measure_time = True
        if batch_size is not None:
            model.eager_batch_size = batch_size
            model.frame_queue_depth = batch_size

        # prepare input frame
        frame = "Images/TwoCats.jpg"
        if exclude_preprocessing:
            frame = model._preprocessor.forward(frame)[0]

        # define source of frames
        def source():
            for fi in range(iterations):
                yield frame

        model(frame)  # run model once to warm up the system

        # run batch prediction
        t = mytools.Timer()
        for res in model.predict_batch(source()):
            pass

        return {
            "postprocess_type": model.output_postprocess_type,
            "elapsed": t(),
            "time_stats": model.time_stats(),
        }


In [None]:
# run batch predict for each model and record time measurements
results = {}
prog = mytools.Progress(len(model_names), speed_units="models/s")
for model_name in model_names:
    results[model_name] = do_test(model_name)
    prog.step()


In [None]:
# print results
print(
    f"{'Model name':62}| {'Postprocess type':20} | {'Throughput, FPS':15} | {'Latency, ms':13} | {'Temperature':11}"
)
print(f"{' ':62}| {' ':20} | {'(inference, ms)':15} | {'(frames)':>13} | {'Â°C':11}")
print(f"{'-'*130}")


def print_result(res):
    latency_ms = res["time_stats"]["FrameTotalDuration_ms"].avg
    inference_ms = res["time_stats"]["CoreInferenceDuration_ms"].avg
    temp_C = res["time_stats"]["DeviceTemperature_C"].avg
    frame_duration_ms = 1e3 * res["elapsed"] / iterations

    print(
        f"{model_name:62}|"
        + f" {res['postprocess_type']:20} |"
        + f" {1e3 / frame_duration_ms:7.1f}"
        + f" ({inference_ms:5.1f}) |"
        + f"  {latency_ms:6.1f}"
        + f" ({round(latency_ms / frame_duration_ms):3d}) |"
        + f" {temp_C:3.0f}"
    )


for model_name, res in results.items():
    if res is not None:
        print_result(res)
