In [35]:
import cv2
from ultralytics import YOLO

In [36]:
def getColours(cls_num):
    base_colors = [(255, 0, 0), (0, 255, 0), (0, 0, 255)]
    color_index = cls_num % len(base_colors)
    increments = [(1, -2, 1), (-2, 1, -1), (1, -1, 2)]
    color = [base_colors[color_index][i] + increments[color_index][i] * 
    (cls_num // len(base_colors)) % 256 for i in range(3)]
    return tuple(color)

In [37]:
yolo = YOLO('yolov8s.pt')

In [38]:
videoCap = cv2.VideoCapture('golf.mp4')


In [39]:
while True:
    ret, frame = videoCap.read()
    if not ret:
        break  # Exit loop if video ends

    # Process the frame with YOLO tracking
    results = yolo.track(frame, stream=True)

    # Flag to check if any object is detected
    object_detected = False

    for result in results:  # Iterate over YOLO results
        object_detected = True  # Set flag to true if results are found
        classes_names = result.names  # Access the detected class names

        # Iterate over each bounding box
        for box in result.boxes:
            if box.conf[0] > 0.4:  # Check if confidence > 40%
                # Get coordinates and class info
                x1, y1, x2, y2 = map(int, box.xyxy[0])
                cls = int(box.cls[0])
                class_name = classes_names[cls]

                # Get respective color
                colour = getColours(cls)

                # Draw the rectangle and label on the frame
                cv2.rectangle(frame, (x1, y1), (x2, y2), colour, 2)
                cv2.putText(frame, f'{class_name} {box.conf[0]:.2f}',
                            (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, colour, 2)

    # Display the frame
    cv2.namedWindow('frame', cv2.WINDOW_NORMAL)
    cv2.imshow('frame', frame)

    # Break loop on key press 'q'
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break
# Release resources
videoCap.release()
cv2.destroyAllWindows()


0: 384x640 1 person, 1 sports ball, 393.5ms
Speed: 26.1ms preprocess, 393.5ms inference, 41.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 1 sports ball, 231.2ms
Speed: 0.0ms preprocess, 231.2ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 1 sports ball, 210.2ms
Speed: 1.9ms preprocess, 210.2ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 1 sports ball, 240.3ms
Speed: 3.4ms preprocess, 240.3ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 1 sports ball, 214.9ms
Speed: 8.1ms preprocess, 214.9ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 1 sports ball, 239.2ms
Speed: 8.4ms preprocess, 239.2ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 1 sports ball, 1 baseball bat, 246.4ms
Speed: 7.8ms preprocess, 246.4ms inference, 8.3ms postprocess per image at shap