# Test the model

In [10]:
import cv2
from ultralytics import YOLO

# Load your trained YOLOv8 model (PyTorch .pt file)
model = YOLO("/path/to/weights/best.pt")  # Update the path

# Set class names if not embedded in the model
# default dataset
class_names = ["down", "left", "no_gesture", "right", "stop", "up"]

# dataset with the thumbs_up gesture
#class_names = ["down", "left", "no_gesture", "right", "stop", "thumbs_up" ,"up"]

# Open webcam
cap = cv2.VideoCapture(0)

while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Run inference
    results = model(frame, imgsz=640, conf=0.4)[0]  # results[0] = first image in batch

    for box in results.boxes:
        x1, y1, x2, y2 = map(int, box.xyxy[0].tolist())
        conf = float(box.conf[0])
        cls_id = int(box.cls[0])
        label = class_names[cls_id] if cls_id < len(class_names) else f"ID:{cls_id}"

        cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
        cv2.putText(frame, f"{label} {conf:.2f}", (x1, y1 - 10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)

    cv2.imshow("YOLOv8n Detection (.pt)", frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()



0: 480x640 1 no_gesture, 3.8ms
Speed: 0.7ms preprocess, 3.8ms inference, 1.1ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 no_gesture, 5.4ms
Speed: 1.0ms preprocess, 5.4ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 no_gesture, 4.4ms
Speed: 0.7ms preprocess, 4.4ms inference, 0.7ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 no_gesture, 3.8ms
Speed: 0.7ms preprocess, 3.8ms inference, 0.7ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 no_gesture, 7.4ms
Speed: 1.1ms preprocess, 7.4ms inference, 0.7ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 no_gesture, 8.1ms
Speed: 1.1ms preprocess, 8.1ms inference, 1.4ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 no_gesture, 5.9ms
Speed: 1.1ms preprocess, 5.9ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 no_gesture, 6.9ms
Speed: 1.1ms preprocess, 6.9ms inference, 0.7ms postprocess per image at

KeyboardInterrupt: 

## Test the exported onnx model

In [6]:
import onnxruntime as ort
import numpy as np
from PIL import Image

# Load image and preprocess
img = Image.open("/path/to/images/frame_left00003.jpg").resize((640, 640)) # Update path
img = np.array(img).astype("float32") / 255.0
img = img.transpose(2, 0, 1)[None, :, :, :]  # NCHW

# Load ONNX model
session = ort.InferenceSession("/path/to/weights/best.onnx") # Update path
outputs = session.run(None, {"images": img})


print(outputs)


[array([[[     19.861,      23.909,      34.752, ...,      563.07,      576.12,       608.4],
        [     13.692,      19.375,      5.2649, ...,       592.7,      583.27,      584.58],
        [     41.552,      50.297,      106.35, ...,      136.16,      123.88,      61.507],
        ...,
        [ 1.4901e-07,  1.7881e-07,  1.7881e-07, ...,  1.0937e-05,  1.3024e-05,  1.4752e-05],
        [ 3.5763e-07,  4.1723e-07,  2.6822e-07, ...,  7.0333e-06,  6.7353e-06,  1.2755e-05],
        [ 3.5763e-07,  3.2783e-07,  2.0862e-07, ...,  6.1691e-06,  9.2685e-06,   1.201e-05]]], dtype=float32)]


## Check the model

In [11]:
from ultralytics import YOLO
model = YOLO("/path/to/weights/best.pt") # Update path
print("Model class names:", model.names)
print("Number of classes:", len(model.names))

Model class names: {0: 'down', 1: 'left', 2: 'no_gesture', 3: 'right', 4: 'stop', 5: 'thumbs_up', 6: 'up'}
Number of classes: 7
