In [1]:
from ultralytics import YOLO
import cv2
import torch


In [2]:
def run_yolo_model_on_image(image_path, model_path='yolov8n.pt'):
    # Load the YOLO model
    model = YOLO(model_path)
    
    # Read the image
    img = cv2.imread(image_path)
    
    # Run the model on the image
    results = model(img)
    
    return results

In [2]:
def yolo_ball_detection(model, video_path, start_frame=0, end_frame=None, show=True):
    """
    Run YOLO model on a video to detect balls.
    Returns a list of tuples: (frame_num, x1, y1, x2, y2, confidence, class)
    """

    cap = cv2.VideoCapture(video_path)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    if end_frame is None or end_frame > total_frames:
        end_frame = total_frames - 1

    cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame)
    frame_num = start_frame
    all_results = []

    device = "cuda" if torch.cuda.is_available() else "cpu"
    model.to(device)

    while frame_num <= end_frame:
        ret, frame = cap.read()
        if not ret:
            break

        # Run YOLO inference
        results = model(frame, stream=True)

        for r in results:
            boxes = r.boxes
            for box in boxes:
                x1, y1, x2, y2 = map(int, box.xyxy[0].tolist())
                conf = float(box.conf[0].item())
                cls = int(box.cls[0].item())
                all_results.append((frame_num, x1, y1, x2, y2, conf, cls))

                # Draw
                cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
                label = f"{model.names[cls]} {conf:.2f}"
                cv2.putText(frame, label, (x1, y1 - 10),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

        if show:
            cv2.imshow("Ball Detection", frame)
            if cv2.waitKey(10) & 0xFF == ord("q"):
                break

        frame_num += 1

    cap.release()
    cv2.destroyAllWindows()
    return all_results

In [4]:
model = YOLO('best.pt')
yolo_ball_detection(model, '../Videos/game_1.mp4', start_frame=0, end_frame=50000)



0: 384x640 (no detections), 28.2ms
Speed: 3.8ms preprocess, 28.2ms inference, 3.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 25.0ms
Speed: 4.2ms preprocess, 25.0ms inference, 1.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 23.0ms
Speed: 4.4ms preprocess, 23.0ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 23.1ms
Speed: 4.8ms preprocess, 23.1ms inference, 2.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 22.9ms
Speed: 5.0ms preprocess, 22.9ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 22.8ms
Speed: 4.3ms preprocess, 22.8ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 29.6ms
Speed: 4.1ms preprocess, 29.6ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 23.2ms
Speed: 4.3ms preprocess, 23.2ms i

[(18, 551, 538, 573, 559, 0.3757529854774475, 0),
 (39, 665, 547, 690, 572, 0.4708777368068695, 0),
 (41, 595, 565, 619, 588, 0.6014701128005981, 0),
 (42, 564, 572, 591, 597, 0.34578731656074524, 0),
 (43, 525, 585, 548, 607, 0.41369110345840454, 0),
 (45, 454, 592, 477, 613, 0.3759619891643524, 0),
 (46, 427, 587, 449, 608, 0.29657524824142456, 0),
 (78, 495, 330, 520, 355, 0.25158053636550903, 0),
 (82, 576, 308, 602, 335, 0.33394724130630493, 0)]