In [1]:
import os
from pathlib import Path

In [4]:
DATASETS_ROOT = Path("../datasets/final-dataset")

In [8]:
SPLIT = "valid"

IMAGES_DIR = DATASETS_ROOT / SPLIT / "images"
LABELS_DIR = DATASETS_ROOT / SPLIT / "labels"

assert IMAGES_DIR.exists(), f"{IMAGES_DIR} not found"
assert LABELS_DIR.exists(), f"{LABELS_DIR} not found"

In [9]:

# Supported image extensions
image_extensions = {".jpg", ".jpeg", ".png", ".bmp"}

# Get image base names (without extension)
image_files = {
    os.path.splitext(f)[0]
    for f in os.listdir(IMAGES_DIR)
    if os.path.splitext(f)[1].lower() in image_extensions
}

# Loop through label files
deleted_count = 0

for label_file in os.listdir(LABELS_DIR):
    if label_file.endswith(".txt"):
        label_name = os.path.splitext(label_file)[0]

        # If corresponding image does NOT exist → delete label
        if label_name not in image_files:
            label_path = os.path.join(LABELS_DIR, label_file)
            os.remove(label_path)
            deleted_count += 1
            print(f"Deleted: {label_file}")

print(f"\n✅ Cleanup complete. Total labels deleted: {deleted_count}")


Deleted: ddd-10-_jpg.rf.98bc5da4adc24d381c4af27ec9005448.txt
Deleted: ddd-102-_jpg.rf.7dd13fa23bb4a3c394ac0047ede61bf9.txt
Deleted: ddd-11-_png.rf.c9386d3fcfbf45543b37b333fbd7e78e.txt
Deleted: ddd-182-_jpg.rf.47abe8afd6aac2a51d162bd0250a7bbf.txt
Deleted: ddd-183-_jpg.rf.f5300f7cc6bcd542b3c203e2f0613e92.txt
Deleted: ddd-193-_jpg.rf.e408a72782ff258d0699ef1bc61049a6.txt
Deleted: ddd-196-_jpg.rf.03699a6e46883dce175503a95240fa35.txt
Deleted: ddd-199-_jpg.rf.11fbd1e4a44d184f38f545cbdb5afbf4.txt
Deleted: ddd-20-_jpeg.rf.305af38588282f6527f9f409981b3286.txt
Deleted: ddd-204-_jpg.rf.4bfb1c4c54ee1d3adf41b86be7cc0b18.txt
Deleted: ddd-21-_jpg.rf.93e20046b7aa7852b55bec63ad724d29.txt
Deleted: ddd-214-_jpg.rf.352d6a7c78f51bdc9ebce32291b54834.txt
Deleted: ddd-236-_jpg.rf.a4ba952815d714eff061e85a599901b0.txt
Deleted: ddd-27-_jpg.rf.7848e44d976e6fa8c55dcebb4a4d12ee.txt
Deleted: ddd-3-_jpg.rf.ed40e13572be97c82ef36ee4e2c53c41.txt
Deleted: ddd-35-_jpg.rf.bd953f3080f5dcd9db953cbc8e9b23ee.txt
Deleted: ddd-39

# Class Label Handeling

In [5]:
LABELS_ROOT = Path(r"E:\ROBOTRONIX\Learning Period\datasets\football Tracking.v1i.yolov11-cleaned")

In [10]:
def collapse_classes_to_one(labels_root):
    txt_files = list(labels_root.rglob("labels/*.txt"))
    print(f"Found {len(txt_files)} label files")

    for txt_path in txt_files:
        if txt_path.stat().st_size == 0:
            continue  # skip empty files

        new_lines = []

        with open(txt_path, "r") as f:
            for line in f:
                parts = line.strip().split()
                if len(parts) != 5:
                    continue  # skip malformed lines

                # Force class id = 0
                _, cx, cy, w, h = parts
                new_lines.append(f"0 {cx} {cy} {w} {h}")

        with open(txt_path, "w") as f:
            f.write("\n".join(new_lines))

    print("✅ All classes collapsed to class 0 (football)")

In [7]:
collapse_classes_to_one(LABELS_ROOT)

Found 659 label files
✅ All classes collapsed to class 0 (football)


# Verification

In [9]:
cls_ids = set()

for txt in Path(LABELS_ROOT).rglob("labels/*.txt"):
    with open(txt) as f:
        for line in f:
            cls_ids.add(int(line.split()[0]))

print(cls_ids)

{0}


In [10]:
del cls_ids

# Now for second one

In [11]:
LABELS_ROOT = Path(r"E:\ROBOTRONIX\Learning Period\datasets\football2.v1i.yolov11-cleaned")

In [12]:
collapse_classes_to_one(LABELS_ROOT)

Found 681 label files
✅ All classes collapsed to class 0 (football)


In [13]:
cls_ids = set()

for txt in Path(LABELS_ROOT).rglob("labels/*.txt"):
    with open(txt) as f:
        for line in f:
            cls_ids.add(int(line.split()[0]))

print(cls_ids)

{0}


In [11]:
LABELS_ROOT = Path(r"E:\ROBOTRONIX\Learning Period\datasets\final-dataset")

In [12]:
collapse_classes_to_one(LABELS_ROOT)

Found 1016 label files


✅ All classes collapsed to class 0 (football)


In [13]:
cls_ids = set()

for txt in Path(LABELS_ROOT).rglob("labels/*.txt"):
    with open(txt) as f:
        for line in f:
            cls_ids.add(int(line.split()[0]))

print(cls_ids)

{0}
