In [2]:
%load_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd
import torch
from app.util.timer import Timer
from app.util.util import Differ
from main import YoloRuntimeTest
from functools import partial

Check CUDA available

In [3]:
torch.cuda.is_available()

True

In [4]:
torch.cuda.device_count()

1

GPU input

In [5]:
args_pytorch = {
    "weights": "./app/weights/yolov9c.pt", 
    "source": "./app/assets/sample_image.jpeg", 
    "classes": "./app/weights/metadata.yaml", 
    "type": "image",
    "show": False, 
    "score_threshold": 0.1, 
    "conf_threshold": 0.2, 
    "iou_threshold": 0.6, 
    "device": "cuda:1"
}

args_onnx = {
    "weights": "./app/weights/yolov9c.onnx", 
    "source": "./app/assets/sample_image.jpeg", 
    "classes": "./app/weights/metadata.yaml",
    "type": "image", 
    "show": False, 
    "score_threshold": 0.1, 
    "conf_threshold": 0.2, 
    "iou_threshold": 0.6, 
    "device": "cuda:1"
}

args_openvino = {
    "weights": "./app/weights/yolov9c_openvino_model", 
    "source": "./app/assets/sample_image.jpeg", 
    "classes": "./app/weights/metadata.yaml",
    "type": "image", 
    "show": False, 
    "score_threshold": 0.1, 
    "conf_threshold": 0.2, 
    "iou_threshold": 0.6, 
    "device": "cuda:1"
}

Initilize YOLO runtime test class

In [6]:
yolo_runtime_test = YoloRuntimeTest()

In [7]:
gpu_pytorch_image = yolo_runtime_test.ultralytics_run_image(args_pytorch)
print(gpu_pytorch_image)

[INFO] Initialize Model
[INFO] Inference Image

image 1/1 c:\Users\User\OneDrive\Desktop\pomper\Internship\yolov9\app\assets\sample_image.jpeg: 448x640 1 person, 1 bicycle, 6 cars, 3 trucks, 96.5ms
Speed: 3.4ms preprocess, 96.5ms inference, 507.1ms postprocess per image at shape (1, 3, 448, 640)
Class: car, Confidence: 0.93, Box: [558, 206, 808, 359]
Class: car, Confidence: 0.92, Box: [286, 210, 458, 352]
Class: car, Confidence: 0.91, Box: [465, 217, 596, 339]
Class: person, Confidence: 0.87, Box: [159, 143, 301, 403]
Class: truck, Confidence: 0.87, Box: [103, 90, 255, 316]
Class: truck, Confidence: 0.78, Box: [722, 170, 871, 346]
Class: truck, Confidence: 0.75, Box: [0, 154, 94, 354]
Class: bicycle, Confidence: 0.65, Box: [210, 321, 266, 443]
Class: car, Confidence: 0.52, Box: [78, 212, 113, 300]
Class: car, Confidence: 0.36, Box: [420, 226, 474, 319]
Class: car, Confidence: 0.30, Box: [420, 227, 464, 278]
Elapsed time: 1.1042 seconds
([['car', 0.9298826456069946, 558, 206, 808, 359],

In [8]:
gpu_openvino_image = yolo_runtime_test.ultralytics_run_image(args_openvino)
print(gpu_openvino_image)

[INFO] Initialize Model
[INFO] Inference Image
Loading app\weights\yolov9c_openvino_model for OpenVINO inference...
Using OpenVINO LATENCY mode for batch=1 inference...

image 1/1 c:\Users\User\OneDrive\Desktop\pomper\Internship\yolov9\app\assets\sample_image.jpeg: 640x640 1 person, 1 bicycle, 6 cars, 3 trucks, 1 traffic light, 128.4ms
Speed: 2.0ms preprocess, 128.4ms inference, 5.5ms postprocess per image at shape (1, 3, 640, 640)
Class: car, Confidence: 0.93, Box: [558, 206, 808, 359]
Class: car, Confidence: 0.92, Box: [286, 210, 458, 352]
Class: car, Confidence: 0.91, Box: [465, 217, 596, 339]
Class: person, Confidence: 0.87, Box: [159, 143, 301, 403]
Class: truck, Confidence: 0.87, Box: [103, 89, 255, 316]
Class: truck, Confidence: 0.79, Box: [722, 170, 871, 346]
Class: truck, Confidence: 0.75, Box: [0, 154, 94, 354]
Class: bicycle, Confidence: 0.65, Box: [210, 321, 266, 443]
Class: car, Confidence: 0.53, Box: [78, 212, 113, 300]
Class: car, Confidence: 0.36, Box: [420, 226, 474, 3

In [9]:
gpu_onnx_image = yolo_runtime_test.ultralytics_run_image(args_onnx)
print(gpu_onnx_image)

[INFO] Initialize Model
[INFO] Inference Image
Loading app\weights\yolov9c.onnx for ONNX Runtime inference...

image 1/1 c:\Users\User\OneDrive\Desktop\pomper\Internship\yolov9\app\assets\sample_image.jpeg: 640x640 1 person, 1 bicycle, 6 cars, 3 trucks, 1 traffic light, 20.0ms
Speed: 2.0ms preprocess, 20.0ms inference, 2.5ms postprocess per image at shape (1, 3, 640, 640)
Class: car, Confidence: 0.93, Box: [558, 206, 808, 359]
Class: car, Confidence: 0.92, Box: [286, 210, 458, 352]
Class: car, Confidence: 0.91, Box: [465, 217, 596, 339]
Class: person, Confidence: 0.87, Box: [159, 143, 301, 403]
Class: truck, Confidence: 0.87, Box: [103, 89, 255, 316]
Class: truck, Confidence: 0.79, Box: [722, 170, 871, 346]
Class: truck, Confidence: 0.75, Box: [0, 154, 94, 354]
Class: bicycle, Confidence: 0.65, Box: [210, 321, 266, 443]
Class: car, Confidence: 0.53, Box: [78, 212, 113, 300]
Class: car, Confidence: 0.36, Box: [420, 226, 474, 319]
Class: car, Confidence: 0.30, Box: [420, 227, 464, 278]
C

In [10]:
gpu_onnx_runtime_image = yolo_runtime_test.onnxruntime_run_image(args_onnx)
print(gpu_onnx_runtime_image)

[INFO] Initialize Model
[INFO] Inference Image
['CUDAExecutionProvider', 'CPUExecutionProvider']
Class: car, Confidence: 0.92, Box: [556, 206, 810, 359]
Class: car, Confidence: 0.92, Box: [463, 217, 595, 338]
Class: car, Confidence: 0.91, Box: [286, 210, 459, 351]
Class: person, Confidence: 0.90, Box: [159, 143, 299, 403]
Class: truck, Confidence: 0.87, Box: [723, 171, 871, 345]
Class: truck, Confidence: 0.84, Box: [102, 89, 257, 314]
Class: bicycle, Confidence: 0.72, Box: [209, 322, 269, 441]
Class: truck, Confidence: 0.68, Box: [0, 154, 93, 354]
Class: car, Confidence: 0.40, Box: [78, 225, 113, 300]
Class: car, Confidence: 0.23, Box: [421, 225, 483, 268]
Class: car, Confidence: 0.22, Box: [421, 229, 470, 320]
Elapsed time: 8.5365 seconds
([['car', 0.9232661724090576, 556, 206, 810, 359], ['car', 0.9168801307678223, 463, 217, 595, 338], ['car', 0.9103737473487854, 286, 210, 459, 351], ['person', 0.9022603631019592, 159, 143, 299, 403], ['truck', 0.8684412837028503, 723, 171, 871, 345]

Difference GPU

In [11]:
pd.set_option('display.expand_frame_repr', False)

def generate_difference_df(image1, image2, label):
    differ = Differ(np.array(image1), np.array(image2))
    result = differ.find_difference()
    return pd.DataFrame(result, columns=[label, "gpu conf_diff", "gpu box_diff (px)"])

df_pt_openvino = generate_difference_df(gpu_pytorch_image[0], gpu_openvino_image[0], "pt vs openvino+ultralytics")
df_pt_onnx = generate_difference_df(gpu_pytorch_image[0], gpu_onnx_image[0], "pt vs onnx+ultralytics")
df_pt_onnxruntime = generate_difference_df(gpu_pytorch_image[0], gpu_onnx_runtime_image[0], "pt vs onnxruntime")

df_combined = pd.concat([df_pt_openvino, df_pt_onnx, df_pt_onnxruntime], axis=1)

print(df_combined)

   pt vs openvino+ultralytics gpu conf_diff     gpu box_diff (px) pt vs onnx+ultralytics gpu conf_diff     gpu box_diff (px) pt vs onnxruntime gpu conf_diff       gpu box_diff (px)
0                         car        0.0006  [0.0, 0.0, 0.0, 0.0]                    car        0.0006  [0.0, 0.0, 0.0, 0.0]               car        0.0066    [2.0, 0.0, 2.0, 0.0]
1                         car        0.0004  [0.0, 0.0, 0.0, 0.0]                    car        0.0004  [0.0, 0.0, 0.0, 0.0]               car        0.0101    [0.0, 0.0, 1.0, 1.0]
2                         car        0.0003  [0.0, 0.0, 0.0, 0.0]                    car        0.0003  [0.0, 0.0, 0.0, 0.0]               car        0.0052    [2.0, 0.0, 1.0, 1.0]
3                      person        0.0004  [0.0, 0.0, 0.0, 0.0]                 person        0.0005  [0.0, 0.0, 0.0, 0.0]            person        0.0295    [0.0, 0.0, 2.0, 0.0]
4                       truck        0.0003  [0.0, 1.0, 0.0, 0.0]                  truck       

Average GPU Time (10)

In [12]:
result_time = []

In [13]:
def collect_execution_times(run_inference_func, iterations=10):
    execution_times = []
    for _ in range(iterations):
        execution_time = run_inference_func()
        execution_times.append(execution_time[1] * 100)
    return execution_times

pytorch_func = partial(yolo_runtime_test.ultralytics_run_image, args=args_pytorch)
openvino_func = partial(yolo_runtime_test.ultralytics_run_image, args=args_openvino)
onnx_func = partial(yolo_runtime_test.ultralytics_run_image, args=args_onnx)
onnx_runtime_func = partial(yolo_runtime_test.onnxruntime_run_image, args=args_onnx)

result_time.append(collect_execution_times(pytorch_func))
result_time.append(collect_execution_times(openvino_func))
result_time.append(collect_execution_times(onnx_func))
result_time.append(collect_execution_times(onnx_runtime_func))

[INFO] Initialize Model
[INFO] Inference Image

image 1/1 c:\Users\User\OneDrive\Desktop\pomper\Internship\yolov9\app\assets\sample_image.jpeg: 448x640 1 person, 1 bicycle, 6 cars, 3 trucks, 7.0ms
Speed: 1.5ms preprocess, 7.0ms inference, 1.0ms postprocess per image at shape (1, 3, 448, 640)
Class: car, Confidence: 0.93, Box: [558, 206, 808, 359]
Class: car, Confidence: 0.92, Box: [286, 210, 458, 352]
Class: car, Confidence: 0.91, Box: [465, 217, 596, 339]
Class: person, Confidence: 0.87, Box: [159, 143, 301, 403]
Class: truck, Confidence: 0.87, Box: [103, 90, 255, 316]
Class: truck, Confidence: 0.78, Box: [722, 170, 871, 346]
Class: truck, Confidence: 0.75, Box: [0, 154, 94, 354]
Class: bicycle, Confidence: 0.65, Box: [210, 321, 266, 443]
Class: car, Confidence: 0.52, Box: [78, 212, 113, 300]
Class: car, Confidence: 0.36, Box: [420, 226, 474, 319]
Class: car, Confidence: 0.30, Box: [420, 227, 464, 278]
Elapsed time: 0.1720 seconds
[INFO] Initialize Model
[INFO] Inference Image

image 

In [14]:
result_time = np.array(result_time)
df = pd.DataFrame(np.transpose(result_time), 
                  columns=["pytorch time gpu (ms)",
                           "openvino+ultralytics time gpu (ms)",
                           "onnx​+ultralytics time gpu (ms)", 
                           "onnx runtime time gpu (ms)"])
df.describe(percentiles=[.9, .95])

Unnamed: 0,pytorch time gpu (ms),openvino+ultralytics time gpu (ms),onnx​+ultralytics time gpu (ms),onnx runtime time gpu (ms)
count,10.0,10.0,10.0,10.0
mean,19.512315,42.650696,723.781164,844.477048
std,1.231816,4.672341,124.174836,1.352624
min,17.19764,39.64082,607.81818,841.86384
50%,19.72287,41.111,633.26964,844.88434
90%,20.9777,45.224911,868.183654,845.542096
95%,21.085475,50.362251,872.569777,845.744803
max,21.19325,55.49959,876.9559,845.94751
