# Testing Models Speed

Processor:     	12th Gen Intel(R) Core(TM) i5-12500H, 3100 Mhz, 12 Core(s), 16 Logical Processor(s)

System Model:	Nitro AN515-58

In [5]:
!pip install onnx onnxruntime

Collecting onnxruntime
  Downloading onnxruntime-1.18.0-cp312-cp312-win_amd64.whl.metadata (4.4 kB)
Collecting coloredlogs (from onnxruntime)
  Using cached coloredlogs-15.0.1-py2.py3-none-any.whl.metadata (12 kB)
Collecting flatbuffers (from onnxruntime)
  Using cached flatbuffers-24.3.25-py2.py3-none-any.whl.metadata (850 bytes)
Collecting sympy (from onnxruntime)
  Using cached sympy-1.12-py3-none-any.whl.metadata (12 kB)
Collecting humanfriendly>=9.1 (from coloredlogs->onnxruntime)
  Using cached humanfriendly-10.0-py2.py3-none-any.whl.metadata (9.2 kB)
Collecting mpmath>=0.19 (from sympy->onnxruntime)
  Using cached mpmath-1.3.0-py3-none-any.whl.metadata (8.6 kB)
Collecting pyreadline3 (from humanfriendly>=9.1->coloredlogs->onnxruntime)
  Using cached pyreadline3-3.4.1-py3-none-any.whl.metadata (2.0 kB)
Downloading onnxruntime-1.18.0-cp312-cp312-win_amd64.whl (5.6 MB)
   ---------------------------------------- 0.0/5.6 MB ? eta -:--:--
   ---------------------------------------- 0

In [1]:
import onnx
import onnxruntime as ort
import numpy as np
import time

def inference_onnx_model(model_path, input_shape):
    # Load the ONNX model
    onnx_model = onnx.load(model_path)

    # Create an ONNX runtime inference session
    session = ort.InferenceSession(model_path)

    # Get the input name of the model
    input_name = session.get_inputs()[0].name

    # Generate sample input data
    input_data = np.random.rand(*input_shape).astype(np.float32)

    # Perform inference and measure execution time
    start_time = time.time()
    output = session.run(None, {input_name: input_data})
    end_time = time.time()

    # Calculate execution time
    execution_time = end_time - start_time

    return output, execution_time

# Example usage:
model_path = 'model_1_post_estimation_resnet.onnx'
input_shape = (1, 3, 256, 256)

output, execution_time = inference_onnx_model(model_path, input_shape)
print("Execution time:", execution_time, "seconds")

Execution time: 0.04138946533203125 seconds


In [2]:
def measure_average_execution_time(model_path, input_shape, num_tests=100):
    """
    Measure the average execution time of inference on an ONNX model.

    Args:
    - model_path (str): Path to the ONNX model file.
    - input_shape (tuple): Shape of the input data for inference.
    - num_tests (int, optional): Number of times to run the inference (default is 100).

    Returns:
    - float: Average execution time in seconds.
    """
    # Initialize an empty list to store execution times
    execution_times = []

    # Run the function multiple times
    for _ in range(num_tests):
        output, execution_time = inference_onnx_model(model_path, input_shape)
        execution_times.append(execution_time)
    
    print(len(execution_times))

    # Calculate the average execution time
    average_execution_time = np.mean(execution_times)

    return average_execution_time

In [3]:
model_path = 'model_1_post_estimation_resnet.onnx'
input_shape = (1, 3, 256, 256)

model_1_post_estimation_resnet_time = measure_average_execution_time(model_path, input_shape, num_tests=100)

model_path = 'model_2_post_estimation_yolo_v8.onnx'
input_shape = (1, 3, 640, 640)

model_2_post_estimation_yolo_v8_time = measure_average_execution_time(model_path, input_shape, num_tests=100)

model_path = 'model_3_post_estimation_yolo_nas.onnx'
input_shape = (1, 3, 640, 640)

model_3_post_estimation_yolo_nas_time = measure_average_execution_time(model_path, input_shape, num_tests=100)

100
100
100


In [4]:
print('Average Execution Time 100 samples')
print('ResNet:', model_1_post_estimation_resnet_time)
print('YOLOv8:', model_2_post_estimation_yolo_v8_time)
print('YOLO-NAS:', model_3_post_estimation_yolo_nas_time)

Average Execution Time 100 samples
ResNet: 0.033821325302124026
YOLOv8: 0.0809701919555664
YOLO-NAS: 0.08244905948638916


In [7]:
model_path = 'model_1_post_estimation_resnet.onnx'
input_shape = (1, 3, 256, 256)

model_1_post_estimation_resnet_time = measure_average_execution_time(model_path, input_shape, num_tests=100)

model_path = 'model_2_post_estimation_yolo_v8.onnx'
input_shape = (1, 3, 640, 640)

model_2_post_estimation_yolo_v8_time = measure_average_execution_time(model_path, input_shape, num_tests=100)

model_path = 'model_3_post_estimation_yolo_nas.onnx'
input_shape = (1, 3, 640, 640)

model_3_post_estimation_yolo_nas_time = measure_average_execution_time(model_path, input_shape, num_tests=100)

100
100
100


In [8]:
print('Average Execution Time 100 samples')
print('ResNet:', model_1_post_estimation_resnet_time)
print('YOLOv8:', model_2_post_estimation_yolo_v8_time)
print('YOLO-NAS:', model_3_post_estimation_yolo_nas_time)

Average Execution Time 100 samples
ResNet: 0.033288047313690186
YOLOv8: 0.07952313423156739
YOLO-NAS: 0.08013875246047973


| Model                       | ResNet_256_50    |YOLOv8_s          |YOLO-NAS_s        |
|-----------------------------|------------------|------------------|------------------|
| mAP (Mean Average Precision)| 88.5             | 86.2             | 88.8             |
| Time on Laptop CPU          | 0.0333 sec       | 0.0795 sec       | 0.0801 sec       |
| Time on Google Collab CPU   | 0.2658 sec       | 0.6005 sec       | 0.7228 sec       |
| Number of layers            | 129              | 311              | 321              |
| Number of weights           | 123              | 145              | 263              |
| Onnx Model Size             | 132.7 mb         | 45.7 mb          | 60.2 mb          |
| Input shape                 | (1, 3, 256, 256) | (1, 3, 640, 640) | (1, 3, 640, 640) |
| Output shape                | (1, 16, 64, 64)  | (1, 56, 8400)    | (1, 8400, 4)     |
| # of Keypoints              | 16               | 17               | 17               |