In [2]:
import cv2
import numpy as np
import os
import time
import onnxruntime as ort
from ultralytics import YOLO
from ultralytics.utils import yaml_load
from ultralytics.utils.checks import check_yaml

In [3]:
# https://medium.com/@zain.18j2000/how-to-use-custom-or-official-yolov8-object-detection-model-in-onnx-format-ca8f055643df
# https://github.com/ultralytics/ultralytics/blob/main/examples/YOLOv8-ONNXRuntime/main.py

model = YOLO("yolov8n.pt")
output_folder = "../output/yolov8_onnx"
model.export(format="onnx", opset = 12, dynamic=True)


Ultralytics 8.3.118 🚀 Python-3.12.3 torch-2.7.0 CPU (Apple M2)
YOLOv8n summary (fused): 72 layers, 3,151,904 parameters, 0 gradients, 8.7 GFLOPs

[34m[1mPyTorch:[0m starting from 'yolov8n.pt' with input shape (1, 3, 640, 640) BCHW and output shape(s) (1, 84, 8400) (6.2 MB)

[34m[1mONNX:[0m starting export with onnx 1.17.0 opset 12...
[34m[1mONNX:[0m slimming with onnxslim 0.1.50...
[34m[1mONNX:[0m export success ✅ 6.4s, saved as 'yolov8n.onnx' (12.1 MB)

Export complete (6.6s)
Results saved to [1m/Users/phucle/Desktop/cmpe258/hw/hw2/yolov8[0m
Predict:         yolo predict task=detect model=yolov8n.onnx imgsz=640  
Validate:        yolo val task=detect model=yolov8n.onnx imgsz=640 data=coco.yaml  
Visualize:       https://netron.app


'yolov8n.onnx'

In [None]:
onnx_path = 'yolov8n.onnx'
folder_path = "../datasets/video_data/images"
output_folder = "../output/yolov8_onnx"

In [5]:
def preprocess_image(image_path, input_size=(640, 640)):
    image = cv2.imread(image_path)
    if image is None:
        raise ValueError(f"Could not read image {image_path}")
    img_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    img_h, img_w = img_rgb.shape[:2]
    img_resized = cv2.resize(img_rgb, input_size)
    img_transposed = img_resized.transpose(2, 0, 1)
    img_input = img_transposed.reshape(1, 3, *input_size).astype(np.float32) / 255.0
    return image, img_input, img_w, img_h

In [6]:
def run_inference(session, input_blob):
    input_name = session.get_inputs()[0].name
    outputs = session.run(None, {input_name: input_blob})
    return outputs[0][0].transpose()

In [7]:
def filter_detections(results, thresh=0.5):
    if results.shape[1] == 5:
        detections = results[results[:, 4] > thresh]
    else:
        temp = []
        for det in results:
            class_id = det[4:].argmax()
            confidence = det[4:].max()
            temp.append(np.append(det[:4], [class_id, confidence]))
        temp = np.array(temp)
        detections = temp[temp[:, -1] > thresh]
    return detections

In [8]:
def rescale_boxes(results, img_w, img_h, input_size=(640, 640)):
    cx, cy, w, h, class_id, conf = results[:, 0], results[:, 1], results[:, 2], results[:, 3], results[:, 4], results[:, 5]
    cx = cx / input_size[0] * img_w
    cy = cy / input_size[1] * img_h
    w = w / input_size[0] * img_w
    h = h / input_size[1] * img_h

    x1 = cx - w / 2
    y1 = cy - h / 2
    x2 = cx + w / 2
    y2 = cy + h / 2

    boxes = np.stack([x1, y1, x2, y2], axis=1)
    classes = class_id.astype(int)
    return boxes, classes, conf

In [9]:
def draw_boxes(image, boxes, classes, scores, class_names):
    for box, cls_id, score in zip(boxes, classes, scores):
        x1, y1, x2, y2 = map(int, box)
        label = f"{class_names[cls_id]} {score:.2f}"
        cv2.rectangle(image, (x1, y1), (x2, y2), (255, 0, 255), 2)
        cv2.putText(image, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 255), 2)
    return image

In [None]:
def apply_nms(boxes, confidences, score_threshold=0.45, iou_threshold=0.7):
    indices = cv2.dnn.NMSBoxes(
        bboxes=boxes.tolist(),
        scores=confidences.tolist(),
        score_threshold=score_threshold,
        nms_threshold=iou_threshold
    )
    indices = indices.flatten() if len(indices) > 0 else []
    return indices

In [11]:
def process_folder(folder_path, output_folder, onnx_path):
    classes = yaml_load(check_yaml("coco8.yaml"))["names"]

    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    session = ort.InferenceSession(onnx_path, providers=['CPUExecutionProvider'])

    image_files = [f for f in os.listdir(folder_path) if f.lower().endswith('.jpg')]

    total_inference_time = 0
    total_images = len(image_files)

    for image_file in image_files:
        image_path = os.path.join(folder_path, image_file)
        original_image, img_input, img_w, img_h = preprocess_image(image_path)

        start_time = time.time()
        outputs = run_inference(session, img_input)
        end_time = time.time()

        inference_time = end_time - start_time
        total_inference_time += inference_time

        filtered_results = filter_detections(outputs)
        boxes, class_ids, confidences = rescale_boxes(filtered_results, img_w, img_h)
        boxes_for_nms = [[int(x1), int(y1), int(x2-x1), int(y2-y1)] for x1, y1, x2, y2 in boxes]
        keep_indices = apply_nms(np.array(boxes_for_nms), confidences)
        final_boxes = boxes[keep_indices]
        final_class_ids = class_ids[keep_indices]
        final_confidences = confidences[keep_indices]
        output_image = draw_boxes(original_image, final_boxes, final_class_ids, final_confidences, classes)
        output_path = os.path.join(output_folder, image_file)
        cv2.imwrite(output_path, output_image)

    print(f"Processed {total_images} images.")
    print(f"\nTotal inference time: images: {total_inference_time:.2f} seconds")
    print(f"Average inference time per image: {total_inference_time/total_images:.2f} seconds")

In [13]:
process_folder(folder_path, output_folder, onnx_path)

Processed 61 images.

Total inference time: images: 2.23 seconds
Average inference time per image: 0.04 seconds
