In [1]:
from ultralytics import YOLO 
import cv2
import math 

In [2]:
cap = cv2.VideoCapture(0)
cap.set(3, 640)
cap.set(4, 480)

model = YOLO("yolo-Weights/yolov8n.pt")

classNames = ["person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck", "boat",
              "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat",
              "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella",
              "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat",
              "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup",
              "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli",
              "carrot", "hot dog", "pizza", "donut", "cake", "chair", "sofa", "pottedplant", "bed",
              "diningtable", "toilet", "tvmonitor", "laptop", "mouse", "remote", "keyboard", "cell phone",
              "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors",
              "teddy bear", "hair drier", "toothbrush"
              ]


while True:
    success, img = cap.read()
    results = model(img, stream=True)

    # coordinates
    for r in results:
        boxes = r.boxes

        for box in boxes:
            # bounding box
            x1, y1, x2, y2 = box.xyxy[0]
            x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2) # convert to int values

            # put box in cam
            cv2.rectangle(img, (x1, y1), (x2, y2), (255, 0, 255), 3)

            # confidence
            confidence = math.ceil((box.conf[0]*100))/100
            print("Confidence --->",confidence)

            # class name
            cls = int(box.cls[0])
            print("Class name -->", classNames[cls])

            # object details
            org = [x1, y1]
            font = cv2.FONT_HERSHEY_SIMPLEX
            fontScale = 1
            color = (255, 0, 0)
            thickness = 2

            cv2.putText(img, classNames[cls], org, font, fontScale, color, thickness)

    cv2.imshow('Webcam', img)
    if cv2.waitKey(1) == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


0: 480x640 1 person, 133.9ms
Confidence ---> 0.31
Class name --> person
Speed: 3.0ms preprocess, 133.9ms inference, 1441.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 131.7ms
Confidence ---> 0.69
Class name --> person
Speed: 2.7ms preprocess, 131.7ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 131.0ms
Confidence ---> 0.64
Class name --> person
Speed: 3.1ms preprocess, 131.0ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 126.3ms
Confidence ---> 0.72
Class name --> person
Speed: 0.0ms preprocess, 126.3ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 117.9ms
Confidence ---> 0.78
Class name --> person
Speed: 0.0ms preprocess, 117.9ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 139.0ms
Confidence ---> 0.55
Class name --> person
Speed: 0.0ms preprocess, 139.0ms inference, 0.0ms postprocess per ima


0: 480x640 1 bottle, 1 remote, 117.3ms
Confidence ---> 0.29
Class name --> bottle
Confidence ---> 0.26
Class name --> remote
Speed: 2.0ms preprocess, 117.3ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 1 bottle, 118.3ms
Confidence ---> 0.4
Class name --> bottle
Confidence ---> 0.3
Class name --> person
Speed: 1.5ms preprocess, 118.3ms inference, 2.5ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 bottle, 200.5ms
Confidence ---> 0.36
Class name --> bottle
Speed: 2.5ms preprocess, 200.5ms inference, 3.5ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 bottle, 203.5ms
Confidence ---> 0.51
Class name --> bottle
Speed: 3.0ms preprocess, 203.5ms inference, 3.5ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 210.1ms
Confidence ---> 0.36
Class name --> person
Speed: 3.5ms preprocess, 210.1ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 209.1ms
Confidence 


0: 480x640 1 cell phone, 120.3ms
Confidence ---> 0.43
Class name --> cell phone
Speed: 2.0ms preprocess, 120.3ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 cell phone, 115.3ms
Confidence ---> 0.42
Class name --> cell phone
Speed: 2.0ms preprocess, 115.3ms inference, 1.5ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 cell phone, 115.8ms
Confidence ---> 0.43
Class name --> cell phone
Speed: 1.5ms preprocess, 115.8ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 cell phone, 123.3ms
Confidence ---> 0.49
Class name --> cell phone
Speed: 2.0ms preprocess, 123.3ms inference, 2.5ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 (no detections), 119.8ms
Speed: 1.5ms preprocess, 119.8ms inference, 1.5ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 (no detections), 174.0ms
Speed: 1.5ms preprocess, 174.0ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 

Confidence ---> 0.43
Class name --> person
Confidence ---> 0.36
Class name --> person
Speed: 1.5ms preprocess, 114.8ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 1 cup, 122.8ms
Confidence ---> 0.58
Class name --> person
Confidence ---> 0.3
Class name --> cup
Speed: 2.0ms preprocess, 122.8ms inference, 1.5ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 2 persons, 132.4ms
Confidence ---> 0.47
Class name --> person
Confidence ---> 0.35
Class name --> person
Speed: 2.0ms preprocess, 132.4ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 2 cups, 144.4ms
Confidence ---> 0.75
Class name --> person
Confidence ---> 0.48
Class name --> cup
Confidence ---> 0.29
Class name --> cup
Speed: 1.0ms preprocess, 144.4ms inference, 1.5ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 126.8ms
Confidence ---> 0.51
Class name --> person
Speed: 2.0ms preprocess, 126.8ms inference, 1.5ms po


0: 480x640 1 person, 110.8ms
Confidence ---> 0.59
Class name --> person
Speed: 1.5ms preprocess, 110.8ms inference, 1.5ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 113.8ms
Confidence ---> 0.32
Class name --> person
Speed: 1.5ms preprocess, 113.8ms inference, 1.5ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 2 persons, 108.3ms
Confidence ---> 0.64
Class name --> person
Confidence ---> 0.54
Class name --> person
Speed: 2.0ms preprocess, 108.3ms inference, 1.5ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 3 persons, 104.3ms
Confidence ---> 0.61
Class name --> person
Confidence ---> 0.59
Class name --> person
Confidence ---> 0.59
Class name --> person
Speed: 1.5ms preprocess, 104.3ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 2 persons, 108.3ms
Confidence ---> 0.45
Class name --> person
Confidence ---> 0.38
Class name --> person
Speed: 1.5ms preprocess, 108.3ms inference, 2.0ms postprocess per im