In [1]:
from detect import detect_objects
import onnxruntime as ort
import numpy as np
import torch
from PIL import Image
import torchvision.transforms as transforms

In [2]:
import time

In [3]:
output, ptTime = detect_objects(
    "../bestModels/yolo.pt",
    "../data/rgb_0999.png",
    conf_thres=0.35,
    iou_thres=0.45,
    img_size=640,
)

Fusing layers... 
RepConv.fuse_repvgg_block
RepConv.fuse_repvgg_block
RepConv.fuse_repvgg_block
IDetect.fuse


  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


In [4]:
def yoloInference(image_path, model_path):
    # Load the ONNX model
    session = ort.InferenceSession(model_path)
    image = Image.open(image_path)
    image_tensor = torch.tensor(
        np.expand_dims(np.array(image)[:, :, :3], axis=0), dtype=torch.float32
    )  # Add batch dimension
    image_tensor = image_tensor.permute(0, 3, 1, 2)

    resize = transforms.Resize((640, 640))  # Define resize transformation
    image_tensor = resize(image_tensor)

    # Perform inference
    input_name = session.get_inputs()[0].name
    start_time = time.time()  # Capture end time
    outputs = session.run(None, {input_name: np.array(image_tensor)})
    end_time = time.time()  # Capture end time
    return outputs, end_time - start_time

In [5]:
outputs, onnxTime = yoloInference("../data/rgb_0999.png", "../bestModels/yolo.onnx")

In [6]:
print(f"ONNX Inference Time: {onnxTime:.4f} seconds")
print(f"PT Inference Time: {ptTime:.4f} seconds")

ONNX Inference Time: 1.2213 seconds
PT Inference Time: 1.0035 seconds


In [7]:
import cv2


def yoloInferenceLive(model_path):
    # Load the ONNX model
    session = ort.InferenceSession(model_path)

    # Open the webcam capture (0 is the default webcam)
    cap = cv2.VideoCapture(0)

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Convert frame to PIL Image
        image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
        image_tensor = torch.tensor(
            np.expand_dims(np.array(image)[:, :, :3], axis=0), dtype=torch.float32
        )  # Add batch dimension
        image_tensor = image_tensor.permute(0, 3, 1, 2)

        resize = transforms.Resize((640, 640))  # Define resize transformation
        image_tensor = resize(image_tensor)

        # Perform inference
        input_name = session.get_inputs()[0].name
        start_time = time.time()  # Capture start time
        outputs = session.run(None, {input_name: np.array(image_tensor)})
        end_time = time.time()  # Capture end time

        # Process outputs (e.g., draw bounding boxes on the frame)
        # This part depends on the specific output format of your model
        # For now, we'll just print the inference time
        print(f"Inference Time: {end_time - start_time:.4f} seconds")

        # Display the frame
        cv2.imshow("YOLO Inference", frame)

        if cv2.waitKey(1) & 0xFF == ord("q"):
            break

    cap.release()
    cv2.destroyAllWindows()

In [8]:
# yoloInferenceLive("../bestModels/yolo.onnx")