In [None]:
import cv2
import argparse
from ultralytics import YOLO
import supervision as sv
import numpy as np

ZONE_POLYGON = np.array([
    [0, 0],
    [0.5, 0],
    [0.5, 1],
    [0, 1]
], dtype=np.float32)


def parse_arguments() -> argparse.Namespace:
    parser = argparse.ArgumentParser(description="YOLOv8 live")
    args, unknown = parser.parse_known_args()  # Handle unknown args in Jupyter
    return args


def main():
    args = parse_arguments()

    cap = cv2.VideoCapture(0)

    model = YOLO("yolov8l.pt")
    
    bounding_box_annotator = sv.BoxAnnotator(thickness=2)
    label_annotator = sv.LabelAnnotator(text_thickness=2, text_scale=1)
    
    ret, frame = cap.read()
    if not ret:
        print("Failed to grab frame")
        return
    
    frame_height, frame_width = frame.shape[:2]
    zone_polygon = (ZONE_POLYGON * np.array([frame_width, frame_height])).astype(int)
    zone = sv.PolygonZone(polygon=zone_polygon)
    zone_annotator = sv.PolygonZoneAnnotator(
        zone=zone,
        color=sv.Color.RED,
        thickness=2,
        text_thickness=4,
        text_scale=2
    )
    
    while True:
        ret, frame = cap.read()
        if not ret:
            break
    
        result = model(frame, agnostic_nms=True)[0]
        boxes = result.boxes.xyxy.cpu().numpy()
        confidences = result.boxes.conf.cpu().numpy()
        class_ids = result.boxes.cls.cpu().numpy().astype(int)  # Convert to integers
    
        detections = sv.Detections(
            xyxy=boxes,
            confidence=confidences,
            class_id=class_ids
        )
        labels = [
            f"{model.names[class_id]} {confidence:0.2f}"
            for confidence, class_id in zip(confidences, class_ids)
        ]
        frame = bounding_box_annotator.annotate(scene=frame, detections=detections)
        frame = label_annotator.annotate(scene=frame, detections=detections, labels=labels)
    
        zone.trigger(detections=detections)
        frame = zone_annotator.annotate(scene=frame)
    
        try:
            cv2.imshow("yolov8", frame)
        except cv2.error as e:
            print(f"Error displaying frame: {e}")
            cv2.imwrite("output_frame.jpg", frame)
            break
    
        if cv2.waitKey(30) == 27:
            break
    
    cap.release()
    cv2.destroyAllWindows()


if __name__ == "__main__":
    main()



0: 480x640 1 person, 1834.0ms
Speed: 10.7ms preprocess, 1834.0ms inference, 5.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 1914.0ms
Speed: 9.9ms preprocess, 1914.0ms inference, 4.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 2011.7ms
Speed: 6.0ms preprocess, 2011.7ms inference, 5.3ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 1650.9ms
Speed: 6.0ms preprocess, 1650.9ms inference, 3.5ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 1444.2ms
Speed: 12.0ms preprocess, 1444.2ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 1836.7ms
Speed: 12.6ms preprocess, 1836.7ms inference, 9.5ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 1320.2ms
Speed: 6.3ms preprocess, 1320.2ms inference, 2.2ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 1000.1ms
Speed: 4.0ms preprocess, 1000.1ms inference, 1.4ms postp