In [5]:
from ultralytics import YOLO
import cv2
import torch


In [2]:
def run_yolo_model_on_image(image_path, model_path='yolov8n.pt'):
    # Load the YOLO model
    model = YOLO(model_path)
    
    # Read the image
    img = cv2.imread(image_path)
    
    # Run the model on the image
    results = model(img)
    
    return results

In [10]:
def yolo_ball_detection(model, video_path, start_frame=0, end_frame=None, show=True):
    """
    Run YOLO model on a video to detect balls.
    Returns a list of tuples: (frame_num, x1, y1, x2, y2, confidence, class)
    """

    cap = cv2.VideoCapture(video_path)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    if end_frame is None or end_frame > total_frames:
        end_frame = total_frames - 1

    cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame)
    frame_num = start_frame
    all_results = []

    device = "cuda" if torch.cuda.is_available() else "cpu"
    model.to(device)

    while frame_num <= end_frame:
        ret, frame = cap.read()
        if not ret:
            break

        # Run YOLO inference
        results = model(frame, stream=True)

        for r in results:
            boxes = r.boxes
            for box in boxes:
                x1, y1, x2, y2 = map(int, box.xyxy[0].tolist())
                conf = float(box.conf[0].item())
                cls = int(box.cls[0].item())
                all_results.append((frame_num, x1, y1, x2, y2, conf, cls))

                # Draw
                cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
                label = f"{model.names[cls]} {conf:.2f}"
                cv2.putText(frame, label, (x1, y1 - 10),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
                
        # Write the frame number on the frame
        cv2.putText(frame, f"Frame: {frame_num}", (10, 30),
                    cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)

        if show:
            cv2.imshow("Ball Detection", frame)
            if cv2.waitKey(1) & 0xFF == ord("q"):
                break

        frame_num += 1

    cap.release()
    cv2.destroyAllWindows()
    return all_results

In [12]:
model = YOLO('best.pt')
yolo_ball_detection(model, '../Videos/game_1.mp4', start_frame=0, end_frame=None)



0: 384x640 (no detections), 25.0ms
Speed: 4.0ms preprocess, 25.0ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 23.2ms
Speed: 4.9ms preprocess, 23.2ms inference, 2.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 23.2ms
Speed: 4.9ms preprocess, 23.2ms inference, 1.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 25.5ms
Speed: 4.6ms preprocess, 25.5ms inference, 1.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 21.3ms
Speed: 4.1ms preprocess, 21.3ms inference, 2.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 22.4ms
Speed: 5.6ms preprocess, 22.4ms inference, 2.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 23.3ms
Speed: 4.4ms preprocess, 23.3ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 21.6ms
Speed: 4.2ms preprocess, 21.6ms i

[(18, 551, 538, 573, 559, 0.3757529854774475, 0),
 (39, 665, 547, 690, 572, 0.4708777368068695, 0),
 (41, 595, 565, 619, 588, 0.6014701128005981, 0),
 (42, 564, 572, 591, 597, 0.34578731656074524, 0),
 (43, 525, 585, 548, 607, 0.41369110345840454, 0),
 (45, 454, 592, 477, 613, 0.3759619891643524, 0),
 (46, 427, 587, 449, 608, 0.29657524824142456, 0),
 (78, 495, 330, 520, 355, 0.25158053636550903, 0),
 (82, 576, 308, 602, 335, 0.33394724130630493, 0),
 (2135, 1509, 180, 1533, 203, 0.2605375349521637, 0),
 (2195, 1217, 594, 1242, 619, 0.4917207360267639, 0),
 (2196, 1192, 584, 1215, 606, 0.4869323670864105, 0),
 (2197, 1167, 573, 1192, 598, 0.3052627146244049, 0),
 (2198, 1145, 566, 1168, 588, 0.5442766547203064, 0),
 (2199, 1121, 560, 1143, 582, 0.5247717499732971, 0),
 (2200, 1095, 548, 1120, 573, 0.5483289361000061, 0),
 (2201, 1074, 541, 1097, 563, 0.5820173025131226, 0),
 (2202, 1053, 538, 1076, 560, 0.5065871477127075, 0),
 (2263, 766, 464, 788, 486, 0.2619929015636444, 0),
 (2264,

In [9]:
import json


def run_yolo_on_video(model_path, video_path, start_frame=0, end_frame=None, conf=0.25):
    model = YOLO(model_path)

    cap = cv2.VideoCapture(video_path)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    # If end_frame not provided, use till last frame
    if end_frame is None or end_frame > total_frames:
        end_frame = total_frames

    # Jump to start frame
    cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame)

    results_dict = {}
    frame_num = start_frame

    while frame_num < end_frame:
        print(f"Processing frame {frame_num}", end='\r')
        ret, frame = cap.read()
        if not ret:
            break

        preds = model.predict(frame, conf=conf, verbose=False)

        for r in preds:
            boxes = r.boxes.xyxy.cpu().numpy()
            if len(boxes) > 0:
                x1, y1, x2, y2 = boxes[0]
                cx = int((x1 + x2) / 2)
                cy = int((y1 + y2) / 2)
                results_dict[str(frame_num)] = {"x": cx, "y": cy}

        frame_num += 1

    cap.release()

    return results_dict, json.dumps(results_dict, indent=4)




In [13]:
ball_markup,json_output = run_yolo_on_video('best.pt', '../Videos/game_1.mp4',0,5000, conf=0.25)

with open('ball_positions.json', 'w') as f:
    f.write(json_output)

Processing frame 4999