In [1]:
import os
import shutil
import random

# Define paths
base_dir = "vehicle"
train_images_dir = os.path.join(base_dir, "train", "images")
train_labels_dir = os.path.join(base_dir, "train", "labels")

valid_images_dir = os.path.join(base_dir, "valid", "images")
valid_labels_dir = os.path.join(base_dir, "valid", "labels")
test_images_dir = os.path.join(base_dir, "test", "images")
test_labels_dir = os.path.join(base_dir, "test", "labels")

# Ensure directories exist
os.makedirs(valid_images_dir, exist_ok=True)
os.makedirs(valid_labels_dir, exist_ok=True)
os.makedirs(test_images_dir, exist_ok=True)
os.makedirs(test_labels_dir, exist_ok=True)

# List all images
all_images = sorted(os.listdir(train_images_dir))
random.shuffle(all_images)  # Shuffle for randomness

# Split dataset (10% validation, 10% test)
num_valid = len(all_images) // 10  
num_test = len(all_images) // 10  

valid_images = all_images[:num_valid]
test_images = all_images[num_valid:num_valid + num_test]

# Move images & labels to valid/
for img_name in valid_images:
    img_path = os.path.join(train_images_dir, img_name)
    label_path = os.path.join(train_labels_dir, img_name.replace(".jpg", ".txt"))

    if os.path.exists(img_path):
        shutil.move(img_path, valid_images_dir)
    if os.path.exists(label_path):
        shutil.move(label_path, valid_labels_dir)

# Move images & labels to test/
for img_name in test_images:
    img_path = os.path.join(train_images_dir, img_name)
    label_path = os.path.join(train_labels_dir, img_name.replace(".jpg", ".txt"))

    if os.path.exists(img_path):
        shutil.move(img_path, test_images_dir)
    if os.path.exists(label_path):
        shutil.move(label_path, test_labels_dir)

print("✅ Data moved successfully!")


✅ Data moved successfully!


#if you have only tain then it will split valid and test 

In [2]:
import os
import shutil

# Define paths
base_dir = "vehicle"
train_labels_dir = os.path.join(base_dir, "train", "labels")
train_images_dir = os.path.join(base_dir, "train", "images")

valid_dir = os.path.join(base_dir, "valid")
test_dir = os.path.join(base_dir, "test")

valid_labels_dir = os.path.join(valid_dir, "labels")
valid_images_dir = os.path.join(valid_dir, "images")
test_labels_dir = os.path.join(test_dir, "labels")
test_images_dir = os.path.join(test_dir, "images")

# Create valid and test folders
os.makedirs(valid_labels_dir, exist_ok=True)
os.makedirs(valid_images_dir, exist_ok=True)
os.makedirs(test_labels_dir, exist_ok=True)
os.makedirs(test_images_dir, exist_ok=True)

# Process label files: Change all class IDs to 6
for label_file in os.listdir(train_labels_dir):
    label_path = os.path.join(train_labels_dir, label_file)

    with open(label_path, "r") as f:
        lines = f.readlines()

    new_lines = []
    for line in lines:
        parts = line.strip().split()
        parts[0] = "6"  # Change ALL class IDs to 6 (Vehicle)
        new_lines.append(" ".join(parts) + "\n")

    # Save modified labels
    with open(label_path, "w") as f:
        f.writelines(new_lines)

# Move 10% of data to valid & test folders
all_images = sorted(os.listdir(train_images_dir))
num_valid = len(all_images) // 10  # 10% for validation
num_test = len(all_images) // 10  # 10% for testing

valid_images = all_images[:num_valid]
test_images = all_images[num_valid:num_valid + num_test]

for img_name in valid_images:
    img_path = os.path.join(train_images_dir, img_name)
    label_path = os.path.join(train_labels_dir, img_name.replace(".jpg", ".txt"))  # Assuming JPG format

    shutil.move(img_path, valid_images_dir)
    if os.path.exists(label_path):
        shutil.move(label_path, valid_labels_dir)

for img_name in test_images:
    img_path = os.path.join(train_images_dir, img_name)
    label_path = os.path.join(train_labels_dir, img_name.replace(".jpg", ".txt"))

    shutil.move(img_path, test_images_dir)
    if os.path.exists(label_path):
        shutil.move(label_path, test_labels_dir)

print("✅ All class IDs changed to 6 (Vehicle), and dataset split into train, valid, and test.")


✅ All class IDs changed to 6 (Vehicle), and dataset split into train, valid, and test.


#for individual folder to change class id 

In [1]:
import os

# Define paths
base_dir = "CrossWalk_Copy"  # Update if needed
folders = ["train", "valid", "test"]

for folder in folders:
    labels_dir = os.path.join(base_dir, folder, "labels")

    # Check if labels directory exists
    if not os.path.exists(labels_dir):
        print(f"❌ Labels folder missing: {labels_dir}")
        continue  # Skip if folder doesn't exist

    # Process each label file
    for label_file in os.listdir(labels_dir):
        label_path = os.path.join(labels_dir, label_file)

        if label_file.endswith(".txt"):
            with open(label_path, "r") as f:
                lines = f.readlines()

            # Replace all class IDs with 0
            updated_lines = ["2 " + " ".join(line.split()[1:]) + "\n" for line in lines]

            # Save back the modified labels
            with open(label_path, "w") as f:
                f.writelines(updated_lines)

            print(f"✅ Updated: {label_path}")

print("🎯 All labels updated successfully!")


✅ Updated: CrossWalk_Copy\train\labels\101_jpg.rf.1935f8fb6ed15d064db90efabfceb986.txt
✅ Updated: CrossWalk_Copy\train\labels\101_jpg.rf.1af4e33116c43a580a0eb5d85e36990b.txt
✅ Updated: CrossWalk_Copy\train\labels\101_jpg.rf.65ad96a1270cd83af1f7319ee1bf24de.txt
✅ Updated: CrossWalk_Copy\train\labels\101_jpg.rf.933f3c8c0396d6aaf7b318c39a0d5aca.txt
✅ Updated: CrossWalk_Copy\train\labels\101_jpg.rf.b0ffa2fc3e5b49cf89a108cc9cbaf5f6.txt
✅ Updated: CrossWalk_Copy\train\labels\101_jpg.rf.e0d64d9ccdf86dc8cc00fbd886eb1b7d.txt
✅ Updated: CrossWalk_Copy\train\labels\101_jpg.rf.f23233609a1a172695cc0a1f60aa12c0.txt
✅ Updated: CrossWalk_Copy\train\labels\102_jpg.rf.b0f812110d9d2bd2ce64489dc68bcff8.txt
✅ Updated: CrossWalk_Copy\train\labels\102_jpg.rf.eab33090de7733281a1c8695ff8d2968.txt
✅ Updated: CrossWalk_Copy\train\labels\102_jpg.rf.f776526d185bcb5e8ff067be7564f516.txt
✅ Updated: CrossWalk_Copy\train\labels\103_jpg.rf.14e02c03007c28c9aa5e9b7f75c55519.txt
✅ Updated: CrossWalk_Copy\train\labels\103_

#to Merge dataset

In [2]:
import os
import shutil

# List of source folders
source_folders = ["traffic_light_v2i_yolov8", "CrossWalk_Copy"]

# Destination dataset path
dataset_base = "padestrian_traffic_light_and_crosswalk"
subfolders = ["train", "valid", "test"]

# Create dataset structure
for subfolder in subfolders:
    os.makedirs(os.path.join(dataset_base, subfolder, "images"), exist_ok=True)
    os.makedirs(os.path.join(dataset_base, subfolder, "labels"), exist_ok=True)

# Function to merge images and labels
def merge_data(data_type):
    dest_images = os.path.join(dataset_base, data_type, "images")
    dest_labels = os.path.join(dataset_base, data_type, "labels")

    for folder in source_folders:
        src_images = os.path.join(folder, data_type, "images")
        src_labels = os.path.join(folder, data_type, "labels")

        if os.path.exists(src_images):
            for file in os.listdir(src_images):
                shutil.copy(os.path.join(src_images, file), os.path.join(dest_images, file))

        if os.path.exists(src_labels):
            for file in os.listdir(src_labels):
                shutil.copy(os.path.join(src_labels, file), os.path.join(dest_labels, file))

# Merge train, valid, test data
for subfolder in subfolders:
    merge_data(subfolder)

print("✅ Dataset merged successfully!")


✅ Dataset merged successfully!


In [6]:
%pip install albumentations

Defaulting to user installation because normal site-packages is not writeable
Collecting albumentations
  Using cached albumentations-2.0.5-py3-none-any.whl.metadata (41 kB)
Collecting pydantic>=2.9.2 (from albumentations)
  Using cached pydantic-2.11.1-py3-none-any.whl.metadata (63 kB)
Collecting albucore==0.0.23 (from albumentations)
  Using cached albucore-0.0.23-py3-none-any.whl.metadata (5.3 kB)
Collecting opencv-python-headless>=4.9.0.80 (from albumentations)
  Using cached opencv_python_headless-4.11.0.86-cp37-abi3-win_amd64.whl.metadata (20 kB)
Collecting annotated-types>=0.6.0 (from pydantic>=2.9.2->albumentations)
  Using cached annotated_types-0.7.0-py3-none-any.whl.metadata (15 kB)
Using cached albumentations-2.0.5-py3-none-any.whl (290 kB)
Using cached albucore-0.0.23-py3-none-any.whl (14 kB)
Using cached opencv_python_headless-4.11.0.86-cp37-abi3-win_amd64.whl (39.4 MB)
Using cached pydantic-2.11.1-py3-none-any.whl (442 kB)
Using cached annotated_types-0.7.0-py3-none-any.

ERROR: Could not install packages due to an OSError: [WinError 5] Access is denied: 'C:\\Users\\welcome\\AppData\\Roaming\\Python\\Python313\\site-packages\\cv2\\cv2.pyd'
Check the permissions.


[notice] A new release of pip is available: 24.3.1 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [21]:
# DATA Augmentation code

import os
import cv2
import numpy as np
import albumentations as A
from albumentations.pytorch import ToTensorV2
import glob


# Define augmentation pipeline
augment = A.Compose([
    A.HorizontalFlip(p=0.5),
    # A.VerticalFlip(p=0.5),
    A.Rotate(limit=20, p=0.5),
    A.RandomBrightnessContrast(p=0.3),
    A.MotionBlur(p=0.2),
    A.Perspective(scale=(0.05, 0.1), p=0.3),
], bbox_params=A.BboxParams(format="yolo", label_fields=["class_labels"]))
dataset_base = "dataset_Copy"  # Base path

for split in ["train", "valid", "test"]:
    dataset_path = os.path.join(dataset_base, split)
    image_folder = os.path.join(dataset_path, "images")
    label_folder = os.path.join(dataset_path, "labels")
    augmented_folder = os.path.join(dataset_path, "augmented")

    # Create folders if they don't exist
    os.makedirs(os.path.join(augmented_folder, "images"), exist_ok=True)
    os.makedirs(os.path.join(augmented_folder, "labels"), exist_ok=True)

    # Get all image files
    image_files = glob.glob(os.path.join(image_folder, "*.jpg"))  # Change extension if needed

    for img_path in image_files:
        # Load image
        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        height, width, _ = img.shape

        # Load corresponding label file
        label_path = os.path.join(label_folder, os.path.basename(img_path).replace(".jpg", ".txt"))
        if not os.path.exists(label_path):
            continue

        with open(label_path, "r") as f:
            labels = f.readlines()

        # Parse YOLO format labels (class x_center y_center width height)
        bboxes = []
        class_labels = []
        for line in labels:
            parts = line.strip().split()
            class_labels.append(int(parts[0]))  # Class ID
            bbox = list(map(float, parts[1:]))  # x_center, y_center, width, height
            if len(bbox) == 4:
                x, y, w, h = bbox
                if 0 <= x <= 1 and 0 <= y <= 1 and 0 <= w <= 1 and 0 <= h <= 1:
                    bboxes.append(bbox)

        # Perform Augmentation 5 times per image
        for i in range(5):
            augmented = augment(image=img, bboxes=bboxes, class_labels=class_labels)
            aug_img = augmented["image"]
            aug_bboxes = augmented["bboxes"]

            # Ensure bounding boxes are valid (Clip them to [0,1])
            aug_bboxes = np.clip(aug_bboxes, 0, 1)

            # Filter out invalid boxes
            valid_bboxes = []
            valid_classes = []
            for box, cls in zip(aug_bboxes, class_labels):
                if len(box) > 4:
                    continue
                x, y, w, h = box
                if 0 <= x <= 1 and 0 <= y <= 1 and 0 <= w <= 1 and 0 <= h <= 1:
                    valid_bboxes.append(box)
                    valid_classes.append(cls)

            if not valid_bboxes:
                continue  # Skip if no valid boxes

            # Save Augmented Image
            aug_img_filename = f"{os.path.splitext(os.path.basename(img_path))[0]}_aug_{i}.jpg"
            aug_img_path = os.path.join(augmented_folder, "images", aug_img_filename)
            cv2.imwrite(aug_img_path, cv2.cvtColor(aug_img, cv2.COLOR_RGB2BGR))

            # Save Augmented Label
            aug_label_filename = aug_img_filename.replace(".jpg", ".txt")
            aug_label_path = os.path.join(augmented_folder, "labels", aug_label_filename)

            with open(aug_label_path, "w") as f:
                for bbox, cls in zip(valid_bboxes, valid_classes):
                    f.write(f"{cls} {' '.join(map(str, bbox))}\n")

    print(f"Augmentation completed for {split} set! 🚀")

print("All dataset splits augmented successfully! ✅")

Augmentation completed for train set! 🚀
Augmentation completed for valid set! 🚀
Augmentation completed for test set! 🚀
All dataset splits augmented successfully! ✅


In [3]:
# Python script to delete .txt files in the labels folder if their corresponding .jpg images do not exist in the images folder.

import os

# Define folder paths
images_folder = "dataset_Copy/valid/images"
labels_folder = "dataset_Copy/valid/labels"

# Get the set of image names without extension
image_names = {os.path.splitext(f)[0] for f in os.listdir(images_folder) if f.endswith(".jpg")}

# Iterate through label files and delete unmatched ones
for label_file in os.listdir(labels_folder):
    # if label_file.endswith(".txt"):
    label_name = os.path.splitext(label_file)[0]
    if label_name not in image_names:
        label_path = os.path.join(labels_folder, label_file)
        os.remove(label_path)
        print(f"Deleted: {label_path}")

print("Cleanup complete.")


Deleted: dataset_Copy/valid/labels\-_png.rf.3489689e759452e8a5ffb8490f8af750_aug_0.txt
Deleted: dataset_Copy/valid/labels\-_png.rf.3489689e759452e8a5ffb8490f8af750_aug_1.txt
Deleted: dataset_Copy/valid/labels\-_png.rf.3489689e759452e8a5ffb8490f8af750_aug_2.txt
Deleted: dataset_Copy/valid/labels\-_png.rf.3489689e759452e8a5ffb8490f8af750_aug_3.txt
Deleted: dataset_Copy/valid/labels\-_png.rf.3489689e759452e8a5ffb8490f8af750_aug_4.txt
Deleted: dataset_Copy/valid/labels\crosswalk_1503_jpg.rf.f26e040fcd71bbc5a5e4b46c65881dd2_aug_0.txt
Deleted: dataset_Copy/valid/labels\crosswalk_1503_jpg.rf.f26e040fcd71bbc5a5e4b46c65881dd2_aug_1.txt
Deleted: dataset_Copy/valid/labels\crosswalk_1503_jpg.rf.f26e040fcd71bbc5a5e4b46c65881dd2_aug_2.txt
Deleted: dataset_Copy/valid/labels\crosswalk_1503_jpg.rf.f26e040fcd71bbc5a5e4b46c65881dd2_aug_3.txt
Deleted: dataset_Copy/valid/labels\crosswalk_1503_jpg.rf.f26e040fcd71bbc5a5e4b46c65881dd2_aug_4.txt
Deleted: dataset_Copy/valid/labels\crosswalk_1507_jpg.rf.a1f030d4

In [18]:

import os

# Define folder paths
images_folder = "CrossWalk - Copy/train/labels"
labels_folder = "CrossWalk - Copy/train/images"

# Get the set of image names without extension
image_names = {os.path.splitext(f)[0] for f in os.listdir(images_folder) if f.endswith(".txt")}

# Iterate through label files and delete unmatched ones
for label_file in os.listdir(labels_folder):
    # if label_file.endswith(".txt"):
    label_name = os.path.splitext(label_file)[0]
    if label_name not in image_names:
        label_path = os.path.join(labels_folder, label_file)
        os.remove(label_path)
        print(f"Deleted: {label_path}")

print("Cleanup complete.")


Deleted: CrossWalk - Copy/train/images\31_jpg.rf.e45fe4d58403bed7ea54aea21b792afb - Copy.jpg
Deleted: CrossWalk - Copy/train/images\33_jpg.rf.874022b75f6803ca30f2a0a4e9de6d58 - Copy.jpg
Deleted: CrossWalk - Copy/train/images\7_jpg.rf.f5bce224c57f019980d1b5dc10a56e4d - Copy.jpg
Cleanup complete.
