# Test the model

In [5]:
!pip install opencv-python ultralytics

Collecting ultralytics
  Using cached ultralytics-8.3.151-py3-none-any.whl.metadata (37 kB)
Collecting matplotlib>=3.3.0 (from ultralytics)
  Using cached matplotlib-3.10.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (11 kB)
Collecting pillow>=7.1.2 (from ultralytics)
  Using cached pillow-11.2.1-cp312-cp312-manylinux_2_28_x86_64.whl.metadata (8.9 kB)
Collecting pyyaml>=5.3.1 (from ultralytics)
  Using cached PyYAML-6.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (2.1 kB)
Collecting requests>=2.23.0 (from ultralytics)
  Using cached requests-2.32.3-py3-none-any.whl.metadata (4.6 kB)
Collecting scipy>=1.4.1 (from ultralytics)
  Using cached scipy-1.15.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
Collecting torch>=1.8.0 (from ultralytics)
  Using cached torch-2.7.1-cp312-cp312-manylinux_2_28_x86_64.whl.metadata (29 kB)
Collecting torchvision>=0.9.0 (from ultralytics)
  Using cached torchvision-0.22.1-cp312

In [None]:
model_path = "path/to/gesture_yolov8n.pt" # <-- ggf. Pfad anpassen
# e.g.
#model_path = "/workspaces/ai_tutorial/ros_ws/src/ai_tutorial/model/gesture_yolov8n.pt"

In [8]:
import cv2
from ultralytics import YOLO

# Load your trained YOLOv8 model (PyTorch .pt file)
model = YOLO(model_path)  # Update the path

# Set class names if not embedded in the model
# default dataset
class_names = ["down", "left", "no_gesture", "right", "stop", "up"]

# dataset with the thumbs_up gesture
#class_names = ["down", "left", "no_gesture", "right", "stop", "thumbs_up" ,"up"]

# Open webcam
cap = cv2.VideoCapture(0)

while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Run inference
    results = model(frame, imgsz=640, conf=0.4)[0]  # results[0] = first image in batch

    for box in results.boxes:
        x1, y1, x2, y2 = map(int, box.xyxy[0].tolist())
        conf = float(box.conf[0])
        cls_id = int(box.cls[0])
        label = class_names[cls_id] if cls_id < len(class_names) else f"ID:{cls_id}"

        cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
        cv2.putText(frame, f"{label} {conf:.2f}", (x1, y1 - 10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)

    cv2.imshow("YOLOv8n Detection (.pt)", frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()



0: 480x640 (no detections), 209.6ms
Speed: 5.9ms preprocess, 209.6ms inference, 0.6ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 (no detections), 123.3ms
Speed: 1.4ms preprocess, 123.3ms inference, 0.6ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 (no detections), 99.3ms
Speed: 3.3ms preprocess, 99.3ms inference, 0.6ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 (no detections), 81.6ms
Speed: 1.9ms preprocess, 81.6ms inference, 0.5ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 (no detections), 72.0ms
Speed: 1.3ms preprocess, 72.0ms inference, 0.6ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 (no detections), 84.1ms
Speed: 1.9ms preprocess, 84.1ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 (no detections), 110.1ms
Speed: 1.5ms preprocess, 110.1ms inference, 0.6ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 (no detections), 73.5ms
Speed: 2.2ms preprocess, 73