In [1]:
import os
from pathlib import Path
import shutil

# Configuration
label_dir = Path("data/new_labels")  
image_dir = Path("data/images")  

# Keep only these old classes and remap them to new ones
old_to_new = {
    15: 0,
    27: 1,
    26: 2,
    3: 3,
    4: 4,
    12: 5,
    6: 6,
    7: 7,
    8: 8,
    24: 9,
    13: 10,
    5: 11,
    25: 12,
    19: 13
}

def clean_labels():
    label_path = label_dir 
    image_path = image_dir 

    for label_file in label_path.glob("*.txt"):
        with open(label_file, 'r') as f:
            lines = f.readlines()

        new_lines = []
        for line in lines:
            parts = line.strip().split()
            if len(parts) < 5:
                print(f"Invalid line in {label_file}: {line}")
                continue
            class_id = int(parts[0])
            if class_id in old_to_new:
                parts[0] = str(old_to_new[class_id])
                new_lines.append(" ".join(parts))

        if new_lines:
            with open(label_file, 'w') as f:
                f.write("\n".join(new_lines))
        else:
            # No valid annotations — remove label and image
            print(f"Removing {label_file} (no valid annotations)")
            label_file.unlink()
            img_path = image_path / (label_file.stem + ".jpg")
            if img_path.exists():
                print(f"Removing {img_path}")
                img_path.unlink()
            else:
                # check for png or jpeg
                for ext in ['.png', '.jpeg']:
                    alt_img = image_path / (label_file.stem + ext)
                    if alt_img.exists():
                        print(f"Removing {alt_img}")
                        alt_img.unlink()
                        break
    print("Label cleaning complete.")


clean_labels()


Removing data\new_labels\batch_10_000018.txt (no valid annotations)
Removing data\images\batch_10_000018.jpg
Removing data\new_labels\batch_10_000024.txt (no valid annotations)
Removing data\images\batch_10_000024.jpg
Removing data\new_labels\batch_10_000061.txt (no valid annotations)
Removing data\images\batch_10_000061.jpg
Removing data\new_labels\batch_10_000069.txt (no valid annotations)
Removing data\images\batch_10_000069.jpg
Removing data\new_labels\batch_11_000020.txt (no valid annotations)
Removing data\images\batch_11_000020.jpg
Removing data\new_labels\batch_11_000022.txt (no valid annotations)
Removing data\images\batch_11_000022.jpg
Removing data\new_labels\batch_11_000025.txt (no valid annotations)
Removing data\images\batch_11_000025.jpg
Removing data\new_labels\batch_11_000027.txt (no valid annotations)
Removing data\images\batch_11_000027.jpg
Removing data\new_labels\batch_12_000016.txt (no valid annotations)
Removing data\images\batch_12_000016.jpg
Removing data\new_l