In [3]:
import cv2
from ultralytics import YOLO

In [9]:
yolo = YOLO('yolov10x.pt')

In [13]:
videoCap = cv2.VideoCapture(0)

In [14]:
def get_color(cls_num):
    base_colors = [(255, 0, 0), (0, 255, 0), (0, 0, 255)]
    color_index = cls_num % len(base_colors)
    increments = [(1, -2, 1), (-2, 1, -1), (1, -1, 2)]
    color = [base_colors[color_index][i] + increments[color_index][i] *
             (cls_num // len(base_colors)) % 256 for i in range(3)]
    return tuple(color)

In [15]:
while True:
    ret, frame = videoCap.read()
    if not ret:
        continue
    results = yolo.track(frame, stream=True)
    for result in results:
        classes_names = result.names
        for box in result.boxes:
            if box.conf[0] > 0.4:
                [x1, y1, x2, y2] = box.xyxy[0]
                x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
                cls = int(box.cls[0])
                class_name = classes_names[cls]
                colour = get_color(cls)
                cv2.rectangle(frame, (x1, y1), (x2, y2), colour, 2)
                cv2.putText(frame, f'{classes_names[int(box.cls[0])]} {box.conf[0]:.2f}', (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, 1, colour, 2)

    cv2.imshow('frame', frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break


0: 480x640 1 person, 75.6ms
Speed: 0.8ms preprocess, 75.6ms inference, 0.3ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 73.3ms
Speed: 1.1ms preprocess, 73.3ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 63.7ms
Speed: 0.9ms preprocess, 63.7ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 63.2ms
Speed: 0.9ms preprocess, 63.2ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 62.7ms
Speed: 0.9ms preprocess, 62.7ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 63.3ms
Speed: 0.9ms preprocess, 63.3ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 63.5ms
Speed: 1.0ms preprocess, 63.5ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 63.9ms
Speed: 0.9ms preprocess, 63.9ms inference, 0.4ms postprocess per image at shape (1, 3, 48

KeyboardInterrupt: 

In [None]:
videoCap.release()
cv2.destroyAllWindows()