In [1]:
%load_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd
import torch
import pickle
from app.util.timer import Timer
from app.util.Differ import Differ
from main import YoloRuntimeTest
from functools import partial
from tqdm import tqdm

Check CUDA available

In [2]:
torch.cuda.is_available()

True

In [3]:
torch.cuda.device_count()

1

GPU input

In [4]:
args_pytorch = {
    "weights": "./app/weights/yolov9c.pt", 
    "source": "./app/assets/sample_image_2.jpg", 
    "classes": "./app/weights/metadata.yaml",
    "inference_type": "ultralytics",
    "type": "image",
    "show": False,
    "conf_threshold": 0.6, 
    "iou_threshold": 0.6, 
    "device": "cuda:0"
}

args_onnx = {
    "weights": "./app/weights/yolov9c.onnx", 
    "source": "./app/assets/sample_image_2.jpg", 
    "classes": "./app/weights/metadata.yaml",
    "inference_type": "ultralytics",
    "type": "image", 
    "show": False,
    "conf_threshold": 0.6, 
    "iou_threshold": 0.6, 
    "device": "cuda:0"
}

args_onnx_runtime_model = {
    "weights": "./app/weights/yolov9c.onnx", 
    "source": "./app/assets/sample_image_2.jpg", 
    "classes": "./app/weights/metadata.yaml",
    "inference_type": "onnxruntime_model",
    "type": "image", 
    "show": False,
    "conf_threshold": 0.6, 
    "iou_threshold": 0.6, 
    "device": "cuda:0"
}

Initilize YOLO runtime test class

In [5]:
yolo_runtime_test = YoloRuntimeTest()
timer = Timer()

In [None]:
timer.start()
gpu_pytorch_ultralytics_image = yolo_runtime_test.ultralytics_run_image(args_pytorch)
timer.stop()
print(f'Execution function time: {timer.elapsed_time} s')
print(gpu_pytorch_ultralytics_image)

In [None]:
timer.start()
gpu_onnx_ultralytics_image = yolo_runtime_test.ultralytics_run_image(args_onnx)
timer.stop()
print(f'Execution function time: {timer.elapsed_time} s')
print(gpu_onnx_ultralytics_image)

In [None]:
timer.start()
gpu_onnx_runtime_model_image = yolo_runtime_test.onnxruntime_run_image(args_onnx_runtime_model)
timer.stop()
print(f'Execution function time: {timer.elapsed_time} s')
print(gpu_onnx_runtime_model_image)

Difference GPU

In [None]:
pd.set_option('display.expand_frame_repr', False)

def generate_difference_df(image1, image2, label):
    differ = Differ(np.array(image1), np.array(image2))
    result = differ.find_difference()
    return pd.DataFrame(result, columns=[label, "gpu conf_diff", "gpu box_diff (px)"])

df_pt_onnx_ultralytics = generate_difference_df(gpu_pytorch_ultralytics_image[0], gpu_onnx_ultralytics_image[0], "pt vs onnx+ultralytics")
df_pt_onnxruntime_model = generate_difference_df(gpu_pytorch_ultralytics_image[0], gpu_onnx_runtime_model_image[0], "pt vs onnxruntime model")

df_combined = pd.concat([df_pt_onnx_ultralytics, df_pt_onnxruntime_model], axis=1)

print(df_combined)

Average GPU Time (100)

In [6]:
result_time = []
ultralytics_inference_time = []

In [7]:
def run_inference_with_args(inference_func, args):
    def wrapper():
        return inference_func(args)
    return wrapper

def collect_execution_times(run_inference_func, args, iterations=100):
    execution_times = []
    for i in range(iterations):
        args["source"] = f"./app/assets/sample_image_{i}.jpg"
        wrapper_func = run_inference_with_args(run_inference_func, args)
        result = wrapper_func()
        execution_times.append(result[1] * 1000)
        ultralytics_inference_time.append(result[2])
    return execution_times

args_pytorch = {
    "weights": "./app/weights/yolov9c.pt", 
    "source": "./app/assets/sample_image_0.jpg", 
    "classes": "./app/weights/metadata.yaml",
    "inference_type": "ultralytics",
    "type": "image",
    "show": False,
    "conf_threshold": 0.6, 
    "iou_threshold": 0.6, 
    "device": "cuda:0"
}

args_onnx = {
    "weights": "./app/weights/yolov9c.onnx", 
    "source": "./app/assets/sample_image_0.jpg", 
    "classes": "./app/weights/metadata.yaml",
    "inference_type": "ultralytics",
    "type": "image", 
    "show": False,
    "conf_threshold": 0.6, 
    "iou_threshold": 0.6, 
    "device": "cuda:0"
}

args_onnx_runtime_model = {
    "weights": "./app/weights/yolov9c.onnx", 
    "source": "./app/assets/sample_image_0.jpg", 
    "classes": "./app/weights/metadata.yaml",
    "inference_type": "onnxruntime_model",
    "type": "image", 
    "show": False,
    "conf_threshold": 0.6, 
    "iou_threshold": 0.6, 
    "device": "cuda:0"
}

result_time.append(collect_execution_times(yolo_runtime_test.ultralytics_run_image, args_pytorch))
# result_time.append(collect_execution_times(yolo_runtime_test.ultralytics_run_image, args_onnx))
# result_time.append(collect_execution_times(yolo_runtime_test.onnxruntime_run_image, args_onnx_runtime_model))



0: 448x640 1 person, 1 bicycle, 3 cars, 3 trucks, 98.1ms
0: 448x640 1 person, 1 bicycle, 3 cars, 3 trucks, 98.1ms
Speed: 4.0ms preprocess, 98.1ms inference, 60.0ms postprocess per image at shape (1, 3, 448, 640)
Speed: 4.0ms preprocess, 98.1ms inference, 60.0ms postprocess per image at shape (1, 3, 448, 640)


0: 480x640 6 cars, 2 traffic lights, 95.8ms
0: 480x640 6 cars, 2 traffic lights, 95.8ms
Speed: 1.5ms preprocess, 95.8ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)
Speed: 1.5ms preprocess, 95.8ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)


0: 480x640 5 cars, 6 traffic lights, 9.0ms
0: 480x640 5 cars, 6 traffic lights, 9.0ms
Speed: 1.5ms preprocess, 9.0ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)
Speed: 1.5ms preprocess, 9.0ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)


0: 480x640 1 person, 4 cars, 6 traffic lights, 11.5ms
0: 480x640 1 person, 4 cars, 6 traffic lights, 11.5ms
Speed: 1.0ms p

In [8]:
result_time.append(collect_execution_times(yolo_runtime_test.ultralytics_run_image, args_onnx))

Loading app\weights\yolov9c.onnx for ONNX Runtime inference...
Loading app\weights\yolov9c.onnx for ONNX Runtime inference...


0: 640x640 1 person, 1 bicycle, 3 cars, 3 trucks, 20.5ms
0: 640x640 1 person, 1 bicycle, 3 cars, 3 trucks, 20.5ms
Speed: 1.0ms preprocess, 20.5ms inference, 1.5ms postprocess per image at shape (1, 3, 640, 640)
Speed: 1.0ms preprocess, 20.5ms inference, 1.5ms postprocess per image at shape (1, 3, 640, 640)
Loading app\weights\yolov9c.onnx for ONNX Runtime inference...
Loading app\weights\yolov9c.onnx for ONNX Runtime inference...


0: 640x640 5 cars, 2 traffic lights, 20.0ms
0: 640x640 5 cars, 2 traffic lights, 20.0ms
Speed: 2.0ms preprocess, 20.0ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)
Speed: 2.0ms preprocess, 20.0ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)
Loading app\weights\yolov9c.onnx for ONNX Runtime inference...
Loading app\weights\yolov9c.onnx for ONNX Runtime inference...


0: 640x640 5 cars, 6 traff

In [9]:
result_time.append(collect_execution_times(yolo_runtime_test.onnxruntime_run_image, args_onnx_runtime_model))

In [19]:
result_time = np.array(result_time)
df = pd.DataFrame(np.transpose(result_time), 
                  columns=["pytorch+ultralytics time gpu (ms)",
                           "onnx​+ultralytics time gpu (ms)", 
                           "onnx runtime time gpu (ms)"])
df.describe(percentiles=[.9, .95])

Unnamed: 0,pytorch+ultralytics time gpu (ms),onnx​+ultralytics time gpu (ms),onnx runtime time gpu (ms)
count,100.0,100.0,100.0
mean,191.843874,8679.813814,8469.946979
std,54.871425,68.066425,19.388084
min,157.3975,8625.1512,8435.8822
50%,175.0374,8664.0128,8468.64905
90%,234.93275,8702.03948,8489.59977
95%,239.66449,8824.15717,8492.06229
max,663.8177,9194.4903,8569.6918


In [11]:
ultralytics_inference_time

[98.10328483581543,
 95.79801559448242,
 9.025812149047852,
 11.525630950927734,
 6.555795669555664,
 91.56060218811035,
 6.51097297668457,
 7.009267807006836,
 7.505893707275391,
 6.084442138671875,
 7.005929946899414,
 6.040811538696289,
 7.538318634033203,
 6.535530090332031,
 9.511470794677734,
 7.190704345703125,
 8.042573928833008,
 6.435871124267578,
 6.068706512451172,
 5.999565124511719,
 7.506847381591797,
 6.509304046630859,
 6.7005157470703125,
 7.511377334594727,
 6.548643112182617,
 7.00068473815918,
 5.504846572875977,
 6.184816360473633,
 6.055593490600586,
 7.511377334594727,
 6.999492645263672,
 6.50477409362793,
 7.014274597167969,
 6.5174102783203125,
 7.032632827758789,
 6.917238235473633,
 6.528139114379883,
 10.012388229370117,
 8.558511734008789,
 10.091066360473633,
 11.049747467041016,
 11.321783065795898,
 11.740684509277344,
 13.48423957824707,
 31.915903091430664,
 26.539325714111328,
 21.813392639160156,
 19.559621810913086,
 18.05400848388672,
 24.7964859

In [18]:
def split_list_into_arrays(input_list):
    list1 = input_list[:100]
    list2 = input_list[100:200]
    
    return list1, list2

array1, array2= split_list_into_arrays(ultralytics_inference_time)
infer_timer_list = [array1, array2]

infer_timer_list = np.array(infer_timer_list)
df_infer = pd.DataFrame(np.transpose(infer_timer_list), columns=["pytorch+ultralytics built-in profiler time gpu (ms)", "onnx+ultralytics built-in profiler time gpu (ms)"])
df_infer.describe(percentiles=[.9, .95])

Unnamed: 0,pytorch+ultralytics built-in profiler time gpu (ms),onnx+ultralytics built-in profiler time gpu (ms)
count,100.0,100.0
mean,13.620884,23.954599
std,15.678415,2.36297
min,5.373478,20.025253
50%,8.300543,23.648024
90%,23.201942,26.641893
95%,26.564956,27.232695
max,98.103285,28.905153


Save GPU result

In [20]:
with open('./app/saved_pkl/gpu_df.pkl', 'wb') as f:
    pickle.dump(df, f)

In [21]:
with open('./app/saved_pkl/gpu_infer_df.pkl', 'wb') as f:
    pickle.dump(df_infer, f)