In [1]:
import cv2
import tensorflow as tf
from ultralytics import YOLO



In [2]:
# load yolo model trained on coco datasets with 80+ classes
model = YOLO("yolov8m.pt") 

In [3]:
# for web camera opening (replace 0 with 1 if you use external camera)
cap = cv2.VideoCapture(0)

In [None]:
# infinite loop runs till you do not exit it 
while True:
    ret, frame = cap.read()
    if not ret:
        break

    # this line sends frame to yolo for object detection
    results = model(frame, stream=True)

    # in result the data about frame is stored
    # And every detection's data is in boxes
    # for boxes: mean the object detected in the frame it runs loop on them
    for r in results:
        boxes = r.boxes
        for box in boxes:
            # taking coordinates for every bounding box's detection
            # (x1, y1) → top-left corner.
            # (x2, y2) → bottom-right corner.
            x1, y1, x2, y2 = map(int, box.xyxy[0])

            # takes out confidence score (0-1) shows that how sure the model is that the detected object is right 
            conf = float(box.conf[0])

            # this shows the object class number and the name of the class
            cls = int(box.cls[0])
            label = model.names[cls]

            # draws rectangle around the detected object
            # (x1, y1), (x2, y2) → corners of the box.
            # (0, 255, 0) → green color.
            # 2 - thickness
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)

            # writing class name on bounding box
            # f"{label} {conf:.2f}" → class name + confidence (upto 2 decimals).
            # (x1, y1 - 10) → writes above the box 
            cv2.putText(frame, f"{label} {conf:.2f}", (x1, y1 - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)

    # displays the output of each and every object detected real time 
    cv2.imshow("YOLOv8 Real-Time Detection", frame)

    # on pressing 'q' it exits the loop
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# cap release- turns off the camera
cap.release()
# turns off every open cv windows
cv2.destroyAllWindows()



0: 480x640 1 car, 342.8ms
Speed: 3.3ms preprocess, 342.8ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 car, 320.8ms
Speed: 3.0ms preprocess, 320.8ms inference, 1.5ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 car, 312.1ms
Speed: 1.6ms preprocess, 312.1ms inference, 1.5ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 car, 322.3ms
Speed: 2.0ms preprocess, 322.3ms inference, 1.1ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 car, 342.2ms
Speed: 1.8ms preprocess, 342.2ms inference, 2.1ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 car, 319.9ms
Speed: 1.5ms preprocess, 319.9ms inference, 1.4ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 car, 319.6ms
Speed: 1.4ms preprocess, 319.6ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 car, 343.7ms
Speed: 2.4ms preprocess, 343.7ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)
