In [1]:
from ultralytics import YOLO
import cv2
import torch


### Test Inference

In [2]:
def yolo_ball_detection(model, video_path, start_frame=0, end_frame=None, show=True,min_conf=0.25):
    """
    Run YOLO model on a video to detect balls.
    Returns a list of tuples: (frame_num, x1, y1, x2, y2, confidence, class)
    """

    cap = cv2.VideoCapture(video_path)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    if end_frame is None or end_frame > total_frames:
        end_frame = total_frames - 1

    cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame)
    frame_num = start_frame
    all_results = []

    device = "cuda" if torch.cuda.is_available() else "cpu"
    model.to(device)

    while frame_num <= end_frame:
        ret, frame = cap.read()
        if not ret:
            break

        # Run YOLO inference
        results = model(frame, stream=True)

        for r in results:
            print(r)
            boxes = r.boxes
            for box in boxes:
                x1, y1, x2, y2 = map(int, box.xyxy[0].tolist())
                conf = float(box.conf[0].item())
                if conf < min_conf:
                    continue
                cls = int(box.cls[0].item())
                all_results.append((frame_num, x1, y1, x2, y2, conf, cls))

                # Draw
                cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
                label = f"{model.names[cls]} {conf:.2f}"
                cv2.putText(frame, label, (x1, y1 - 10),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
                
        # Write the frame number on the frame
        cv2.putText(frame, f"Frame: {frame_num}", (10, 30),
                    cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)

        if show:
            cv2.imshow("Ball Detection", frame)
            if cv2.waitKey(10) & 0xFF == ord("q"):
                break

        frame_num += 1

    cap.release()
    cv2.destroyAllWindows()
    return all_results

In [3]:
model = YOLO('ver20.pt')
yolo_ball_detection(model, '../Videos/game_5.mp4', start_frame=0, end_frame=85000,min_conf=0.3)



0: 384x640 1 ball, 106.3ms
ultralytics.engine.results.Results object with attributes:

boxes: ultralytics.engine.results.Boxes object
keypoints: None
masks: None
names: {0: 'ball'}
obb: None
orig_img: array([[[255, 254, 255],
        [255, 254, 255],
        [255, 254, 255],
        ...,
        [  4,  12,   9],
        [  1,   9,   6],
        [  0,   7,   4]],

       [[255, 254, 255],
        [255, 254, 255],
        [255, 254, 255],
        ...,
        [  3,  11,   8],
        [  2,  10,   7],
        [  1,   9,   6]],

       [[255, 253, 255],
        [255, 253, 255],
        [255, 253, 255],
        ...,
        [  2,  10,   7],
        [  3,  11,   8],
        [  4,  12,   9]],

       ...,

       [[ 20,  59,  98],
        [ 20,  59,  98],
        [ 20,  59,  98],
        ...,
        [ 21,  60,  86],
        [ 21,  60,  86],
        [ 21,  60,  86]],

       [[ 20,  59,  98],
        [ 20,  59,  98],
        [ 20,  59,  98],
        ...,
        [ 26,  60,  87],
        [ 26

[(0, 1007, 611, 1034, 638, 0.8318803310394287, 0),
 (1, 1026, 618, 1053, 644, 0.8217894434928894, 0),
 (2, 1045, 625, 1072, 652, 0.8649517297744751, 0),
 (3, 1064, 632, 1091, 659, 0.8378504514694214, 0),
 (4, 1082, 639, 1109, 666, 0.8541592359542847, 0),
 (5, 1102, 647, 1129, 674, 0.835577666759491, 0),
 (6, 1119, 655, 1147, 682, 0.872763454914093, 0),
 (7, 1138, 663, 1166, 691, 0.7347084283828735, 0),
 (8, 1156, 672, 1185, 700, 0.907769501209259, 0),
 (9, 1174, 680, 1202, 709, 0.8684931993484497, 0),
 (10, 1192, 690, 1220, 718, 0.847131609916687, 0),
 (11, 1209, 700, 1238, 729, 0.7961624264717102, 0),
 (12, 1228, 703, 1257, 732, 0.8363661170005798, 0),
 (13, 1244, 700, 1274, 730, 0.8551403880119324, 0),
 (14, 1261, 697, 1292, 727, 0.8990739583969116, 0),
 (15, 1278, 692, 1308, 722, 0.8133946061134338, 0),
 (16, 1296, 689, 1326, 718, 0.8874890804290771, 0),
 (17, 1314, 686, 1344, 715, 0.8730348944664001, 0),
 (18, 1330, 683, 1360, 713, 0.8540778160095215, 0),
 (19, 1350, 681, 1379, 710

In [4]:
import json


def run_yolo_on_video(model_path, video_path, start_frame=0, end_frame=None, conf=0.25):
    model = YOLO(model_path)

    cap = cv2.VideoCapture(video_path)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    # If end_frame not provided, use till last frame
    if end_frame is None or end_frame > total_frames:
        end_frame = total_frames

    # Jump to start frame
    cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame)

    results_dict = {}
    frame_num = start_frame

    while frame_num < end_frame:
        print(f"Processing frame {frame_num}", end='\r')
        ret, frame = cap.read()
        if not ret:
            break

        preds = model.predict(frame, conf=conf, verbose=False)

        for r in preds:
            boxes = r.boxes.xyxy.cpu().numpy()
            if len(boxes) > 0:
                x1, y1, x2, y2 = boxes[0]
                cx = int((x1 + x2) / 2)
                cy = int((y1 + y2) / 2)
                results_dict[str(frame_num)] = {"x": cx, "y": cy}

        frame_num += 1

    cap.release()

    return results_dict, json.dumps(results_dict, indent=4)




In [5]:
ball_markup,json_output = run_yolo_on_video('ver20.pt', '../Videos/game_5.mp4',0,5000, conf=0.25)

with open('ball_positions.json', 'w') as f:
    f.write(json_output)

Processing frame 4999

### Training

In [16]:
# ========================================
# Step 1: Install and Import Dependencies
# ========================================

from ultralytics import YOLO
import os
import pandas as pd

# ========================================
# Step 2: Mount Google Drive
# ========================================

# ========================================
# Step 3: Define Paths
# ========================================
dataset_path = "dataset"
project_path = "yolo_training"
run_name = "tt_ball_detector"

data_yaml = f"{dataset_path}/data.yaml"

# ========================================
# Step 4: Create data.yaml (if not exists)
# ========================================
classes = ["ball"]  # Modify if you have more classes

if not os.path.exists(data_yaml):
    with open(data_yaml, "w") as f:
        f.write(f"""
train: images/train
val: images/val

nc: {len(classes)}
names: {classes}
""")
    print("✅ Created data.yaml at:", data_yaml)
else:
    print("✅ data.yaml already exists at:", data_yaml)

# ========================================
# Step 5: Check for Previous Checkpoint
# ========================================
weights_dir = f"{project_path}/{run_name}/weights"
last_ckpt = f"{weights_dir}/last.pt"

if os.path.exists(last_ckpt):
    print(f"🟢 Found previous checkpoint: {last_ckpt}")
    model = YOLO(last_ckpt)
    resume_training = True
else:
    print("🟡 No previous checkpoint found — starting fresh training.")
    model = YOLO("yolo11n.pt")  # You can change to yolov8s.pt, yolov8m.pt, etc.
    resume_training = False

# ========================================
# Step 6: Train / Resume Training
# ========================================
if resume_training:
    # Resume exactly where it left off
    model.train(
        data=data_yaml,
        epochs=10,            # total epochs to reach
        resume=True,
        device="cuda" if torch.cuda.is_available() else "cpu"
    )
else:
    # Start new training
    model.train(
        data=data_yaml,
        epochs=10,
        imgsz=640,
        batch=16,
        name=run_name,
        project=project_path,
        pretrained=True,
        device="cuda" if torch.cuda.is_available() else "cpu"
    )

# ========================================
# Step 7: Optional — View Training Progress
# ========================================
results_csv = f"{project_path}/{run_name}/results.csv"

if os.path.exists(results_csv):
    df = pd.read_csv(results_csv)
    print("\n📈 Training progress so far:")
    display(df.tail())
else:
    print("⚠️ No results.csv found yet (training just resumed).")

# ========================================
# Step 8: Verify Saved Weights
# ========================================
print("\n📁 Saved weight files:")


✅ Created data.yaml at: dataset/data.yaml
🟡 No previous checkpoint found — starting fresh training.


NameError: name 'torch' is not defined

yolo train model=yolo11n.pt data=dataset/data.yaml epochs=10 imgsz=640 batch=16 name=tt_ball_detector project=yolo_training pretrained=True device="cuda"

yolo train model=yolo_training/tt_ball_detector16/weights/best.pt data=dataset/data.yaml epochs=10 imgsz=640 batch=16 name=tt_ball_detector project=yolo_training pretrained=True device="cuda" resume=True

In [12]:
import os
dataset_path=""

In [15]:
empty_count = 0
non_empty_count = 0
for file in os.listdir(f"labels"):
    with open(f"labels/{file}","r") as f:
        if f.read().strip():
            non_empty_count += 1
        else:
            empty_count += 1

print("Non empty files: ",non_empty_count)
print("Empty files: ",empty_count)

Non empty files:  6721
Empty files:  2000


### Dataset Preparation

In [4]:
import os
import pandas as pd
from PIL import Image
import random
# Paths
frames_dir = "frames"   # folder with frame_0001.jpg ...
labels_dir = "labels"   # output YOLO labels
os.makedirs(labels_dir, exist_ok=True)

# Load ball annotations
# CSV format: frame,x,y,x_min,y_min,x_max,y_max
df = pd.read_csv("ball_bounding_boxes_game1.csv")

unlabeled_max = 2000
unlabeled_count = 0

# Group annotations by frame number
annotations = {}
for _, row in df.iterrows():
    fid = int(row["frame"])
    if fid not in annotations:
        annotations[fid] = []
    annotations[fid].append(row)

for frame_file in sorted(os.listdir(frames_dir)):
    if not frame_file.endswith(".jpg"):
        continue

    # Extract frame number from file name (frame_0001.jpg → 1)
    frame_id = int(frame_file.split("_")[-1].split(".")[0])

    frame_path = os.path.join(frames_dir, frame_file)
    img = Image.open(frame_path)
    w, h = img.size

    label_path = os.path.join(labels_dir, frame_file.replace(".jpg", ".txt"))

    if frame_id in annotations:
        with open(label_path, "w") as f:
            for row in annotations[frame_id]:
                x_min, y_min, x_max, y_max = row["x_min"], row["y_min"], row["x_max"], row["y_max"]

                # Convert to YOLO format
                x_center = ((x_min + x_max) / 2) / w
                y_center = ((y_min + y_max) / 2) / h
                box_w = (x_max - x_min) / w
                box_h = (y_max - y_min) / h

                f.write(f"0 {x_center:.6f} {y_center:.6f} {box_w:.6f} {box_h:.6f}\n")
    else:
        if unlabeled_count < unlabeled_max:
            if random.random() < 0.4:  # 10% chance to keep an unlabeled frame
                open(label_path, "w").close()
                unlabeled_count += 1



In [8]:
import os
import shutil
import random
from sklearn.model_selection import train_test_split

# Mount Google Drive
# Paths
images_path = "frames"
labels_path = "labels"
output_path = "dataset"

# Train/Val split ratio
val_ratio = 0.2

# Get all images (assuming .jpg or .png)
image_files = [f for f in os.listdir(images_path) if f.endswith(('.jpg', '.png'))]

# Split train and val
train_files, val_files = train_test_split(image_files, test_size=val_ratio, random_state=42)
# Function to copy files
def copy_files(file_list, split):
    count = 0
    for f in file_list:
        label_file = os.path.splitext(f)[0] + ".txt"
        src_lbl = os.path.join(labels_path, label_file)
        if not os.path.exists(src_lbl):
            continue  # Skip if no corresponding label file

        print(f"Processing {count}th file")
        count += 1
        # image
        src_img = os.path.join(images_path, f)
        dst_img = os.path.join(output_path, "images", split, f)
        os.makedirs(os.path.dirname(dst_img), exist_ok=True)
        shutil.copy(src_img, dst_img)

        # label (same name, .txt)
        label_file = os.path.splitext(f)[0] + ".txt"
        src_lbl = os.path.join(labels_path, label_file)
        if os.path.exists(src_lbl):
            dst_lbl = os.path.join(output_path, "labels", split, label_file)
            os.makedirs(os.path.dirname(dst_lbl), exist_ok=True)
            shutil.copy(src_lbl, dst_lbl)

# Copy train and val
copy_files(train_files, "train")
copy_files(val_files, "val")

print(f"✅ Dataset prepared in {output_path}")


Processing 0th file
Processing 1th file
Processing 2th file
Processing 3th file
Processing 4th file
Processing 5th file
Processing 6th file
Processing 7th file
Processing 8th file
Processing 9th file
Processing 10th file
Processing 11th file
Processing 12th file
Processing 13th file
Processing 14th file
Processing 15th file
Processing 16th file
Processing 17th file
Processing 18th file
Processing 19th file
Processing 20th file
Processing 21th file
Processing 22th file
Processing 23th file
Processing 24th file
Processing 25th file
Processing 26th file
Processing 27th file
Processing 28th file
Processing 29th file
Processing 30th file
Processing 31th file
Processing 32th file
Processing 33th file
Processing 34th file
Processing 35th file
Processing 36th file
Processing 37th file
Processing 38th file
Processing 39th file
Processing 40th file
Processing 41th file
Processing 42th file
Processing 43th file
Processing 44th file
Processing 45th file
Processing 46th file
Processing 47th file
Pr

### Real Usage

In [7]:
from ultralytics import YOLO
import cv2

In [8]:
def ball_detection(model, video_path, start_frame=0, end_frame=None, show=True):
    """
    Run YOLO model on a video to detect balls.
    Returns a list of tuples: (frame_num, x1, y1, x2, y2, confidence, class)
    """

    cap = cv2.VideoCapture(video_path)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    if end_frame is None or end_frame > total_frames:
        end_frame = total_frames - 1

    cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame)
    frame_num = start_frame
    all_results = []

    device = "cuda" if torch.cuda.is_available() else "cpu"
    model.to(device)

    while frame_num <= end_frame:
        ret, frame = cap.read()
        if not ret:
            break

        # Run YOLO inference
        results = model(frame, stream=True)

        for r in results:
            print(r)
            boxes = r.boxes
            for box in boxes:
                x1, y1, x2, y2 = map(int, box.xyxy[0].tolist())
                conf = float(box.conf[0].item())
                cls = int(box.cls[0].item())
                all_results.append((frame_num, x1, y1, x2, y2, conf, cls))

                # Draw
                cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
                label = f"{model.names[cls]} {conf:.2f}"
                cv2.putText(frame, label, (x1, y1 - 10),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
                
        # Write the frame number on the frame
        cv2.putText(frame, f"Frame: {frame_num}", (10, 30),
                    cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)

        if show:
            cv2.imshow("Ball Detection", frame)
            if cv2.waitKey(10) & 0xFF == ord("q"):
                break

        frame_num += 1

    cap.release()
    cv2.destroyAllWindows()
    return all_results

In [10]:
model = YOLO('yolo_training/tt_ball_detector16/weights/best.pt')
results = ball_detection(model, '../Videos/full_match.mp4', start_frame=0, end_frame=1000,show=False)


0: 384x640 (no detections), 26.5ms
ultralytics.engine.results.Results object with attributes:

boxes: ultralytics.engine.results.Boxes object
keypoints: None
masks: None
names: {0: 'ball'}
obb: None
orig_img: array([[[ 4,  4,  4],
        [ 4,  4,  4],
        [ 4,  4,  4],
        ...,
        [ 8,  3,  4],
        [ 8,  3,  4],
        [ 8,  3,  4]],

       [[ 4,  4,  4],
        [ 4,  4,  4],
        [ 4,  4,  4],
        ...,
        [ 8,  3,  4],
        [ 8,  3,  4],
        [ 8,  3,  4]],

       [[ 4,  4,  4],
        [ 4,  4,  4],
        [ 4,  4,  4],
        ...,
        [ 8,  3,  4],
        [ 8,  3,  4],
        [ 8,  3,  4]],

       ...,

       [[87, 80, 77],
        [87, 80, 77],
        [86, 79, 76],
        ...,
        [83, 74, 71],
        [83, 74, 71],
        [83, 74, 71]],

       [[86, 79, 76],
        [86, 79, 76],
        [86, 79, 76],
        ...,
        [83, 74, 71],
        [83, 74, 71],
        [83, 74, 71]],

       [[86, 79, 76],
        [86, 79, 76]

In [13]:
def mid(x,y):
    return (x+y)//2

In [None]:
def process_results(results):
    '''
        Process the yolo results into simple x,y coordinates
        Input: list of tuples (frame_num, x1, y1, x2, y2, confidence, class)
        Output: list of lists [frame_num, x, y]
    '''
    processed = []
    for r in results:
        x = mid(r[1],r[3])
        y = mid(r[2],r[4])
        new_result = [r[0],x,y,r[5]]
        processed.append(new_result)
    return processed

In [None]:
def clean_up_results(results):
    '''
        Handles all cleanup activities on the results
        i) If multiple detections in a frame, keep the one with highest confidence
    '''