In [3]:
import os
import shutil
import random

# Paths
original_images = "C:\\Users\\Sriram\\Desktop\\Garbage_folder\\images"
original_labels = "C:\\Users\\Sriram\\Desktop\\Garbage_folder\\labels"
dataset_dir = "C:\\Users\\Sriram\\Desktop\\Garbage_folder"

# Create folders
for split in ["train", "val", "test"]:
    os.makedirs(f"{dataset_dir}/{split}/images", exist_ok=True)
    os.makedirs(f"{dataset_dir}/{split}/labels", exist_ok=True)

# Get all image files (assuming .jpg/.png)
image_files = [f for f in os.listdir(original_images) if f.endswith(('.jpg', '.png'))]
random.shuffle(image_files)  # Shuffle randomly

# Split ratios (e.g., 70% train, 20% val, 10% test)
train_split = int(0.7 * len(image_files))
val_split = int(0.2 * len(image_files))

train_files = image_files[:train_split]
val_files = image_files[train_split:train_split + val_split]
test_files = image_files[train_split + val_split:]

# Copy images and labels to respective folders
def copy_files(files, split):
    for file in files:
        # Copy image
        shutil.copy2(
            os.path.join(original_images, file),
            os.path.join(dataset_dir, split, "images", file)
        )
        # Copy corresponding label (assume .txt for YOLO)
        label_file = os.path.splitext(file)[0] + ".txt"
        shutil.copy2(
            os.path.join(original_labels, label_file),
            os.path.join(dataset_dir, split, "labels", label_file)
        )

copy_files(train_files, "train")
copy_files(val_files, "val")
copy_files(test_files, "test")

print(f"Train: {len(train_files)}, Val: {len(val_files)}, Test: {len(test_files)}")

Train: 175, Val: 50, Test: 26


In [7]:
import os

def check_missing_labels(dataset_path):
    for split in ["train", "val", "test"]:
        image_dir = os.path.join(dataset_path, split, "images")
        label_dir = os.path.join(dataset_path, split, "labels")
        images = set(os.path.splitext(f)[0] for f in os.listdir(image_dir))
        labels = set(os.path.splitext(f)[0] for f in os.listdir(label_dir))
        missing = images - labels
        print(f"{split.upper()}: {len(missing)} missing labels: {list(missing)[:5]}...")

check_missing_labels("C:\\Users\\Sriram\\Desktop\\Garbage_folder")

TRAIN: 0 missing labels: []...
VAL: 0 missing labels: []...
TEST: 0 missing labels: []...


In [6]:
import os

# Remove the image with missing label
image_path = "C:\\Users\\Sriram\\Desktop\\Garbage_folder\\train\\images\\paper340_jpg.rf.90a0202e5e08e2af8c58befaeeec0020.jpg"
if os.path.exists(image_path):
    os.remove(image_path)
    print("Removed image with missing label:", image_path)

Removed image with missing label: C:\Users\Sriram\Desktop\Garbage_folder\train\images\paper340_jpg.rf.90a0202e5e08e2af8c58befaeeec0020.jpg


In [8]:
import os

splits = ["train", "val", "test"]
for split in splits:
    img_dir = os.path.join(split, "images")
    label_dir = os.path.join(split, "labels")
    print(f"{split.upper()}:")
    print(f"  Images: {img_dir} exists? {os.path.exists(img_dir)}")
    print(f"  Labels: {label_dir} exists? {os.path.exists(label_dir)}")

TRAIN:
  Images: train\images exists? True
  Labels: train\labels exists? True
VAL:
  Images: val\images exists? True
  Labels: val\labels exists? True
TEST:
  Images: test\images exists? True
  Labels: test\labels exists? True
