In [1]:
import cv2
from ultralytics import YOLO  # Import YOLO model from Ultralytics
import supervision as sv  # Import the supervision library for annotations

class ObjectDetectionWithWebcam:
    """
    This class performs real-time object detection using a webcam and YOLO model.

    Attributes:
        model (YOLO): YOLO object detection model.
        webcam (cv2.VideoCapture): Webcam object for capturing frames.
    """

    def __init__(self, model_weights: str = 'yolov8s.pt'):
        """
        Initializes the ObjectDetectionWithWebcam class.

        Args:
            model_weights (str): Path to the YOLO model weights file (default is 'yolov8s.pt').
        """
        self.model = YOLO(model_weights)
        self.webcam = cv2.VideoCapture(0)

        if not self.webcam.isOpened():
            raise RuntimeError("Cannot open webcam")

    def __del__(self):
        """
        Cleans up resources by releasing the webcam.
        """
        self.webcam.release()
        cv2.destroyAllWindows()

    def detect_objects(self):
        """
        Performs real-time object detection using the webcam and displays the annotated frames.
        """
        while True:
            # Read frame from webcam
            ret, frame = self.webcam.read()

            if not ret:
                print("Can't receive frame (stream end?), Exiting ...")
                break
            
            # Perform object detection on the frame using the YOLO model
            results = self.model(frame)[0]

            # Convert YOLO detections to Supervision Detections format
            detections = sv.Detections.from_ultralytics(results)

            # Create a bounding box annotator with specified thickness
            bounding_box_annotator = sv.BoundingBoxAnnotator(
                thickness=4
            )

            # Create a label annotator
            label_annotator = sv.LabelAnnotator()

            # Filter out detections with class_id not equal to 0 (background class)
            detections = detections[detections.class_id != 0]

            # Get labels for each detected object
            labels = [
                self.model.model.names[class_id]
                for class_id
                in detections.class_id
            ]

            # Annotate the frame with bounding boxes
            annotated_image = bounding_box_annotator.annotate(
                scene=frame, detections=detections)

            # Annotate the frame with labels
            annotated_image = label_annotator.annotate(
                scene=annotated_image, detections=detections, labels=labels)

            # Display the annotated frame
            cv2.imshow("Object Detection", annotated_image)

            # Exit loop if 'q' key is pressed
            if cv2.waitKey(1) == ord("q"):
                break

# Usage example:
if __name__ == "__main__":
    # Initialize ObjectDetectionWithWebcam class
    detector = ObjectDetectionWithWebcam()

    # Perform real-time object detection
    detector.detect_objects()
    detector.__del__()






0: 384x640 1 bed, 108.5ms
Speed: 2.4ms preprocess, 108.5ms inference, 359.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 1 chair, 2 beds, 70.0ms
Speed: 1.4ms preprocess, 70.0ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 1 bed, 89.4ms
Speed: 2.1ms preprocess, 89.4ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 1 chair, 1 bed, 102.6ms
Speed: 1.3ms preprocess, 102.6ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 1 bed, 1 refrigerator, 66.2ms
Speed: 1.3ms preprocess, 66.2ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 1 chair, 2 beds, 56.8ms
Speed: 1.1ms preprocess, 56.8ms inference, 0.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 2 chairs, 1 bed, 74.2ms
Speed: 1.1ms preprocess, 74.2ms inference, 0.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 perso