In [1]:
%load_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd
import torch
import pickle
from app.util.timer import Timer
from app.util.Differ import Differ
from main import YoloRuntimeTest
from functools import partial
from tqdm import tqdm

Check CUDA available

In [2]:
torch.cuda.is_available()

True

In [3]:
torch.cuda.device_count()

1

GPU input

In [4]:
args_pytorch = {
    "weights": "./app/weights/yolov9c.pt", 
    "source": "./app/assets/sample_image_2.jpg", 
    "classes": "./app/weights/metadata.yaml",
    "inference_type": "ultralytics",
    "type": "image",
    "show": False,
    "conf_threshold": 0.6, 
    "iou_threshold": 0.6, 
    "device": "cuda:0"
}

args_onnx = {
    "weights": "./app/weights/yolov9c.onnx", 
    "source": "./app/assets/sample_image_2.jpg", 
    "classes": "./app/weights/metadata.yaml",
    "inference_type": "ultralytics",
    "type": "image", 
    "show": False,
    "conf_threshold": 0.6, 
    "iou_threshold": 0.6, 
    "device": "cuda:0"
}

args_onnx_runtime_model = {
    "weights": "./app/weights/yolov9c.onnx", 
    "source": "./app/assets/sample_image_2.jpg", 
    "classes": "./app/weights/metadata.yaml",
    "inference_type": "onnxruntime_model",
    "type": "image", 
    "show": False,
    "conf_threshold": 0.6, 
    "iou_threshold": 0.6, 
    "device": "cuda:0"
}

Initilize YOLO runtime test class

In [5]:
yolo_runtime_test = YoloRuntimeTest()
timer = Timer()

In [None]:
timer.start()
gpu_pytorch_ultralytics_image = yolo_runtime_test.ultralytics_run_image(args_pytorch)
timer.stop()
print(f'Execution function time: {timer.elapsed_time} s')
print(gpu_pytorch_ultralytics_image)

In [7]:
timer.start()
gpu_onnx_ultralytics_image = yolo_runtime_test.ultralytics_run_image(args_onnx)
timer.stop()
print(f'Execution function time: {timer.elapsed_time} s')
print(gpu_onnx_ultralytics_image)

[INFO] Inference Image
Loading app\weights\yolov9c.onnx for ONNX Runtime inference...



RuntimeError: Input must be a list of dictionaries or a single numpy array for input 'images'.

In [6]:
timer.start()
gpu_onnx_runtime_model_image = yolo_runtime_test.onnxruntime_run_image(args_onnx_runtime_model)
timer.stop()
print(f'Execution function time: {timer.elapsed_time} s')
print(gpu_onnx_runtime_model_image)

[INFO] Inference Image
[[[     9.5217       13.74      30.501 ...      552.67      584.18      614.37]
  [     22.544      24.495      18.649 ...      615.01      619.86      620.32]
  [     27.039      38.157       40.64 ...      410.78      410.26      380.44]
  ...
  [ 2.3842e-07  2.3842e-07  2.3842e-07 ...  3.2187e-06  3.3379e-06  3.4571e-06]
  [ 3.5763e-07  2.3842e-07  2.3842e-07 ...  3.2187e-06  3.3379e-06  3.2187e-06]
  [ 3.5763e-07  2.3842e-07  2.3842e-07 ...  2.7418e-06   2.861e-06  2.6226e-06]]]
Elapsed time: 8.6924 seconds
Execution function time: 9.0990075 s
([['car', 0.9386059641838074, 856, 1686, 1511, 2235], ['traffic light', 0.9162204265594482, 366, 761, 480, 985], ['car', 0.9144188761711121, 154, 1658, 620, 2019], ['traffic light', 0.8306471109390259, 0, 768, 44, 997], ['traffic light', 0.8175824284553528, 991, 1367, 1042, 1466], ['traffic light', 0.8092846870422363, 1403, 1394, 1452, 1491], ['traffic light', 0.760214626789093, 1180, 1380, 1226, 1475], ['car', 0.756393

Difference GPU

In [None]:
pd.set_option('display.expand_frame_repr', False)

def generate_difference_df(image1, image2, label):
    differ = Differ(np.array(image1), np.array(image2))
    result = differ.find_difference()
    return pd.DataFrame(result, columns=[label, "gpu conf_diff", "gpu box_diff (px)"])

df_pt_onnx_ultralytics = generate_difference_df(gpu_pytorch_ultralytics_image[0], gpu_onnx_ultralytics_image[0], "pt vs onnx+ultralytics")
df_pt_onnxruntime_model = generate_difference_df(gpu_pytorch_ultralytics_image[0], gpu_onnx_runtime_model_image[0], "pt vs onnxruntime model")

df_combined = pd.concat([df_pt_onnx_ultralytics, df_pt_onnxruntime_model], axis=1)

print(df_combined)

Average GPU Time (100)

In [None]:
result_time = []
ultralytics_inference_time = []

In [None]:
def run_inference_with_args(inference_func, args):
    def wrapper():
        return inference_func(args)
    return wrapper

def collect_execution_times(run_inference_func, args, iterations=100):
    execution_times = []
    for i in range(iterations):
        args["source"] = f"./app/assets/sample_image_{i}.jpg"
        wrapper_func = run_inference_with_args(run_inference_func, args)
        result = wrapper_func()
        execution_times.append(result[1] * 1000)
        ultralytics_inference_time.append(result[2])
    return execution_times

args_pytorch = {
    "weights": "./app/weights/yolov9c.pt", 
    "source": "./app/assets/sample_image_0.jpg", 
    "classes": "./app/weights/metadata.yaml",
    "inference_type": "ultralytics",
    "type": "image",
    "show": False,
    "conf_threshold": 0.6, 
    "iou_threshold": 0.6, 
    "device": "cuda:0"
}

args_onnx = {
    "weights": "./app/weights/yolov9c.onnx", 
    "source": "./app/assets/sample_image_0.jpg", 
    "classes": "./app/weights/metadata.yaml",
    "inference_type": "ultralytics",
    "type": "image", 
    "show": False,
    "conf_threshold": 0.6, 
    "iou_threshold": 0.6, 
    "device": "cuda:0"
}

args_onnx_runtime_model = {
    "weights": "./app/weights/yolov9c.onnx", 
    "source": "./app/assets/sample_image_0.jpg", 
    "classes": "./app/weights/metadata.yaml",
    "inference_type": "onnxruntime_model",
    "type": "image", 
    "show": False,
    "conf_threshold": 0.6, 
    "iou_threshold": 0.6, 
    "device": "cuda:0"
}

result_time.append(collect_execution_times(yolo_runtime_test.ultralytics_run_image, args_pytorch))
# result_time.append(collect_execution_times(yolo_runtime_test.ultralytics_run_image, args_onnx))
# result_time.append(collect_execution_times(yolo_runtime_test.onnxruntime_run_image, args_onnx_runtime_model))

In [None]:
result_time.append(collect_execution_times(yolo_runtime_test.ultralytics_run_image, args_onnx))

In [None]:
result_time.append(collect_execution_times(yolo_runtime_test.onnxruntime_run_image, args_onnx_runtime_model))

In [None]:
result_time = np.array(result_time)
df = pd.DataFrame(np.transpose(result_time), 
                  columns=["pytorch+ultralytics time gpu (ms)",
                           "onnx​+ultralytics time gpu (ms)", 
                           "onnx runtime time gpu (ms)"])
df.describe(percentiles=[.9, .95])

In [None]:
ultralytics_inference_time

In [None]:
def split_list_into_arrays(input_list):
    list1 = input_list[:100]
    list2 = input_list[100:200]
    
    return list1, list2

array1, array2= split_list_into_arrays(ultralytics_inference_time)
infer_timer_list = [array1, array2]

infer_timer_list = np.array(infer_timer_list)
df_infer = pd.DataFrame(np.transpose(infer_timer_list), columns=["pytorch+ultralytics built-in profiler time gpu (ms)", "onnx+ultralytics built-in profiler time gpu (ms)"])
df_infer.describe(percentiles=[.9, .95])

Save GPU result

In [None]:
with open('./app/saved_pkl/gpu_df.pkl', 'wb') as f:
    pickle.dump(df, f)

In [None]:
with open('./app/saved_pkl/gpu_infer_df.pkl', 'wb') as f:
    pickle.dump(df_infer, f)