In [1]:
%load_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd
import torch
import pickle
from app.util.timer import Timer
from app.util.Differ import Differ
from main import YoloRuntimeTest
from functools import partial

Check CUDA available

In [2]:
torch.cuda.is_available()

True

In [3]:
torch.cuda.device_count()

1

GPU input

In [4]:
args_pytorch = {
    "weights": "./app/weights/yolov9c.pt", 
    "source": "./app/assets/sample_image_3.jpg", 
    "classes": "./app/weights/metadata.yaml", 
    "type": "image",
    "show": False, 
    "conf_threshold": 0.25, 
    "iou_threshold": 0.45, 
    "device": "cuda:0"
}

args_onnx = {
    "weights": "./app/weights/yolov9c.onnx", 
    "source": "./app/assets/sample_image_3.jpg", 
    "classes": "./app/weights/metadata.yaml",
    "type": "image", 
    "show": False, 
    "conf_threshold": 0.25, 
    "iou_threshold": 0.45, 
    "device": "cuda:0"
}

args_openvino = {
    "weights": "./app/weights/yolov9c_openvino_model", 
    "source": "./app/assets/sample_image_3.jpg", 
    "classes": "./app/weights/metadata.yaml",
    "type": "image", 
    "show": False, 
    "conf_threshold": 0.25, 
    "iou_threshold": 0.45, 
    "device": "cuda:0"
}

Initilize YOLO runtime test class

In [5]:
yolo_runtime_test = YoloRuntimeTest()

In [6]:
gpu_pytorch_image = yolo_runtime_test.ultralytics_run_image(args_pytorch)
print(gpu_pytorch_image)

[INFO] Initialize Model
[INFO] Inference Image


infer time: 0.1843855000000012 s
infer time: 0.0957111000000026 s
image 1/1 c:\Users\User\OneDrive\Desktop\pomper\Internship\yolov9\app\assets\sample_image_3.jpg: 480x640 6 persons, 8 cars, 1 truck, 9 traffic lights, 96.2ms
image 1/1 c:\Users\User\OneDrive\Desktop\pomper\Internship\yolov9\app\assets\sample_image_3.jpg: 480x640 6 persons, 8 cars, 1 truck, 9 traffic lights, 96.2ms
Speed: 3.5ms preprocess, 96.2ms inference, 529.0ms postprocess per image at shape (1, 3, 480, 640)
Speed: 3.5ms preprocess, 96.2ms inference, 529.0ms postprocess per image at shape (1, 3, 480, 640)
Elapsed time: 0.0962 seconds
Class: car, Confidence: 0.93, Box: [1, 2018, 664, 2355]
Class: car, Confidence: 0.91, Box: [815, 2097, 1315, 2295]
Class: traffic light, Confidence: 0.82, Box: [1346, 1263, 1408, 1428]
Class: traffic light, Confidence: 0.82, Box: [1907, 1279, 1966, 1447]
Class: person, Confidence: 0.82, Box: [1487, 2043, 1569, 2324]
Class: traffic light, Co

In [7]:
gpu_openvino_image = yolo_runtime_test.ultralytics_run_image(args_openvino)
print(gpu_openvino_image)

[INFO] Initialize Model
[INFO] Inference Image
Loading app\weights\yolov9c_openvino_model for OpenVINO inference...
Loading app\weights\yolov9c_openvino_model for OpenVINO inference...
Using OpenVINO LATENCY mode for batch=1 inference...
Using OpenVINO LATENCY mode for batch=1 inference...


image 1/1 c:\Users\User\OneDrive\Desktop\pomper\Internship\yolov9\app\assets\sample_image_3.jpg: 640x640 6 persons, 8 cars, 1 truck, 10 traffic lights, 122.8ms
image 1/1 c:\Users\User\OneDrive\Desktop\pomper\Internship\yolov9\app\assets\sample_image_3.jpg: 640x640 6 persons, 8 cars, 1 truck, 10 traffic lights, 122.8ms
Speed: 1.0ms preprocess, 122.8ms inference, 8.5ms postprocess per image at shape (1, 3, 640, 640)
Speed: 1.0ms preprocess, 122.8ms inference, 8.5ms postprocess per image at shape (1, 3, 640, 640)
Elapsed time: 0.1228 seconds
Class: car, Confidence: 0.93, Box: [1, 2018, 663, 2354]
Class: car, Confidence: 0.91, Box: [815, 2097, 1315, 2295]
Class: person, Confidence: 0.82, Box: [1487, 20

In [8]:
gpu_onnx_image = yolo_runtime_test.ultralytics_run_image(args_onnx)
print(gpu_onnx_image)

[INFO] Initialize Model
[INFO] Inference Image
Loading app\weights\yolov9c.onnx for ONNX Runtime inference...
Loading app\weights\yolov9c.onnx for ONNX Runtime inference...


infer time: 8.8292966 s
infer time: 0.0243259000000009 s
image 1/1 c:\Users\User\OneDrive\Desktop\pomper\Internship\yolov9\app\assets\sample_image_3.jpg: 640x640 6 persons, 8 cars, 1 truck, 10 traffic lights, 26.7ms
image 1/1 c:\Users\User\OneDrive\Desktop\pomper\Internship\yolov9\app\assets\sample_image_3.jpg: 640x640 6 persons, 8 cars, 1 truck, 10 traffic lights, 26.7ms
Speed: 3.0ms preprocess, 26.7ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)
Speed: 3.0ms preprocess, 26.7ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)
Elapsed time: 0.0267 seconds
Class: car, Confidence: 0.93, Box: [1, 2018, 663, 2354]
Class: car, Confidence: 0.91, Box: [815, 2097, 1315, 2295]
Class: person, Confidence: 0.82, Box: [1487, 2043, 1569, 2323]
Class: traffic light, Confidence: 0.82, Box: [134

In [9]:
gpu_onnx_runtime_image = yolo_runtime_test.onnxruntime_run_image(args_onnx)
print(gpu_onnx_runtime_image)

[INFO] Initialize Model
[INFO] Inference Image
run infer time: 8.477141500000002
Elapsed time: 8.5435 seconds
Class: car, Confidence: 0.94, Box: [0, 2016, 659, 2356]
Class: car, Confidence: 0.90, Box: [815, 2100, 1316, 2297]
Class: person, Confidence: 0.84, Box: [1488, 2047, 1568, 2320]
Class: traffic light, Confidence: 0.84, Box: [1346, 1261, 1408, 1429]
Class: traffic light, Confidence: 0.84, Box: [1907, 1281, 1967, 1447]
Class: traffic light, Confidence: 0.82, Box: [1160, 1203, 1228, 1368]
Class: car, Confidence: 0.78, Box: [1795, 2214, 1917, 2288]
Class: traffic light, Confidence: 0.76, Box: [3015, 773, 3161, 1041]
Class: traffic light, Confidence: 0.71, Box: [628, 1188, 699, 1355]
Class: traffic light, Confidence: 0.68, Box: [539, 1578, 602, 1692]
Class: traffic light, Confidence: 0.68, Box: [2751, 1027, 2872, 1242]
Class: car, Confidence: 0.66, Box: [3643, 2222, 4029, 2373]
Class: traffic light, Confidence: 0.63, Box: [411, 1513, 487, 1630]
Class: person, Confidence: 0.61, Box: [

: 

Difference GPU

In [None]:
pd.set_option('display.expand_frame_repr', False)

def generate_difference_df(image1, image2, label):
    differ = Differ(np.array(image1), np.array(image2))
    result = differ.find_difference()
    return pd.DataFrame(result, columns=[label, "gpu conf_diff", "gpu box_diff (px)"])

df_pt_openvino = generate_difference_df(gpu_pytorch_image[0], gpu_openvino_image[0], "pt vs openvino+ultralytics")
df_pt_onnx = generate_difference_df(gpu_pytorch_image[0], gpu_onnx_image[0], "pt vs onnx+ultralytics")
df_pt_onnxruntime = generate_difference_df(gpu_pytorch_image[0], gpu_onnx_runtime_image[0], "pt vs onnxruntime")

df_combined = pd.concat([df_pt_openvino, df_pt_onnx, df_pt_onnxruntime], axis=1)

print(df_combined)

Average GPU Time (10)

In [None]:
result_time = []

In [None]:
def collect_execution_times(run_inference_func, iterations=10):
    execution_times = []
    for _ in range(iterations):
        execution_time = run_inference_func()
        execution_times.append(execution_time[1] * 100)
    return execution_times

pytorch_func = partial(yolo_runtime_test.ultralytics_run_image, args=args_pytorch)
openvino_func = partial(yolo_runtime_test.ultralytics_run_image, args=args_openvino)
onnx_func = partial(yolo_runtime_test.ultralytics_run_image, args=args_onnx)
onnx_runtime_func = partial(yolo_runtime_test.onnxruntime_run_image, args=args_onnx)

result_time.append(collect_execution_times(pytorch_func))
result_time.append(collect_execution_times(openvino_func))
result_time.append(collect_execution_times(onnx_func))
result_time.append(collect_execution_times(onnx_runtime_func))

In [None]:
result_time = np.array(result_time)
df = pd.DataFrame(np.transpose(result_time), 
                  columns=["pytorch time gpu (ms)",
                           "openvino+ultralytics time gpu (ms)",
                           "onnx​+ultralytics time gpu (ms)", 
                           "onnx runtime time gpu (ms)"])
df.describe(percentiles=[.9, .95])

Save GPU result

In [None]:
with open('./app/saved_pkl/gpu_df.pkl', 'wb') as f:
    pickle.dump(df, f)