In [2]:
import os
import shutil
from sklearn.model_selection import train_test_split

def create_dir(path):
    if not os.path.exists(path):
        os.makedirs(path)

def split_dataset(dataset_dir, output_dir, test_size=0.2, val_size=0.1):
    # Define the paths
    train_dir = os.path.join(dataset_dir, 'train')
    labels_dir = os.path.join(train_dir, 'labels')
    images_dir = os.path.join(train_dir, 'images')

    # Get the list of files
    labels = sorted(os.listdir(labels_dir))
    images = sorted(os.listdir(images_dir))

    # Ensure that the labels and images match
    assert len(labels) == len(images)
    assert all(label.split('.')[0] == image.split('.')[0] for label, image in zip(labels, images))

    # Split the dataset
    train_labels, test_labels, train_images, test_images = train_test_split(labels, images, test_size=test_size, random_state=42)
    train_labels, val_labels, train_images, val_images = train_test_split(train_labels, train_images, test_size=val_size/(1-test_size), random_state=42)

    # Create output directories
    for split in ['train', 'val', 'test']:
        create_dir(os.path.join(output_dir, split, 'labels'))
        create_dir(os.path.join(output_dir, split, 'images'))

    # Function to copy files to the respective directories
    def copy_files(file_list, src_dir, dst_dir):
        for file in file_list:
            shutil.copy(os.path.join(src_dir, file), os.path.join(dst_dir, file))

    # Copy the files to the respective directories
    copy_files(train_labels, labels_dir, os.path.join(output_dir, 'train', 'labels'))
    copy_files(train_images, images_dir, os.path.join(output_dir, 'train', 'images'))
    copy_files(val_labels, labels_dir, os.path.join(output_dir, 'val', 'labels'))
    copy_files(val_images, images_dir, os.path.join(output_dir, 'val', 'images'))
    copy_files(test_labels, labels_dir, os.path.join(output_dir, 'test', 'labels'))
    copy_files(test_images, images_dir, os.path.join(output_dir, 'test', 'images'))

    print("Dataset split completed!")

# Example usage
dataset_dir = "C:/Users/Anirudh/OneDrive/Desktop/dataaa/card"
output_dir = 'C:/Users/Anirudh/OneDrive/Desktop/dataaa/card_split'
split_dataset(dataset_dir, output_dir)


ModuleNotFoundError: No module named 'sklearn.utils'

In [5]:
import os
import shutil
import random

def create_dir(directory):
    if not os.path.exists(directory):
        os.makedirs(directory)

def split_dataset(images_dir, annotations_dir, output_dir, train_ratio=0.8):
    # Create output directories
    train_images_dir = os.path.join(output_dir, 'train', 'images')
    train_labels_dir = os.path.join(output_dir, 'train', 'labels')
    val_images_dir = os.path.join(output_dir, 'val', 'images')
    val_labels_dir = os.path.join(output_dir, 'val', 'labels')

    create_dir(train_images_dir)
    create_dir(train_labels_dir)
    create_dir(val_images_dir)
    create_dir(val_labels_dir)

    # List all images and shuffle them
    images = [f for f in os.listdir(images_dir) if f.endswith('.jpg')]
    random.shuffle(images)

    # Calculate split index
    split_idx = int(len(images) * train_ratio)

    # Split images into training and validation sets
    train_images = images[:split_idx]
    val_images = images[split_idx:]

    # Copy training images and annotations
    for image in train_images:
        base_name = os.path.splitext(image)[0]
        annotation = base_name + '.txt'

        # Ensure the annotation file exists
        if os.path.exists(os.path.join(annotations_dir, annotation)):
            shutil.copy(os.path.join(images_dir, image), os.path.join(train_images_dir, image))
            shutil.copy(os.path.join(annotations_dir, annotation), os.path.join(train_labels_dir, annotation))
        else:
            print(f"Warning: Annotation file {annotation} for image {image} not found.")

    # Copy validation images and annotations
    for image in val_images:
        base_name = os.path.splitext(image)[0]
        annotation = base_name + '.txt'

        # Ensure the annotation file exists
        if os.path.exists(os.path.join(annotations_dir, annotation)):
            shutil.copy(os.path.join(images_dir, image), os.path.join(val_images_dir, image))
            shutil.copy(os.path.join(annotations_dir, annotation), os.path.join(val_labels_dir, annotation))
        else:
            print(f"Warning: Annotation file {annotation} for image {image} not found.")

    print("Dataset split completed.")

# Define paths
images_dir = "C:/Users/Anirudh/OneDrive/Desktop/dataaa/card/train/images"
annotations_dir = 'C:/Users/Anirudh/OneDrive/Desktop/dataaa/card/train/labels'
output_dir = 'C:/Users/Anirudh/OneDrive/Desktop/dataaa/card_split'

# Split the dataset
split_dataset(images_dir, annotations_dir, output_dir, train_ratio=0.8)


KeyboardInterrupt: 

In [1]:
import os

def update_label_ids(labels_dir):
    # Get the list of label files
    label_files = [f for f in os.listdir(labels_dir) if f.endswith('.txt')]

    for label_file in label_files:
        label_path = os.path.join(labels_dir, label_file)
        
        # Read the content of the label file
        with open(label_path, 'r') as file:
            lines = file.readlines()
        
        # Update the label ID from 0 to 1
        updated_lines = []
        for line in lines:
            parts = line.split()
            if parts[0] == '0':
                parts[0] = '5'
            updated_lines.append(' '.join(parts) + '\n')
        
        # Write the updated content back to the file
        with open(label_path, 'w') as file:
            file.writelines(updated_lines)
    
    print("Label IDs updated.")

# Define the path to the labels directory
labels_dir = "C:/Users/Anirudh/OneDrive/Desktop/dataaa/tyre/train/labels"


# Update label IDs
update_label_ids(labels_dir)


Label IDs updated.


In [None]:
import os
import shutil
import random

# Set random seed for reproducibility
random.seed(42)

# Define the paths
dataset_path = "C:/Users/Anirudh/OneDrive/Desktop/dataaa/Dataset"
images_path = os.path.join(dataset_path, "images")
labels_path = os.path.join(dataset_path, "labels")

# Define the split ratios
train_ratio = 0.7
val_ratio = 0.2
test_ratio = 0.1

# Create the directories for the splits
split_paths = {
    "train": {"images": "C:/Users/Anirudh/OneDrive/Desktop/output/train_split/images", "labels": "C:/Users/Anirudh/OneDrive/Desktop/output/train_split/labels"},
    "val": {"images": "C:/Users/Anirudh/OneDrive/Desktop/output/val_split/images", "labels": "C:/Users/Anirudh/OneDrive/Desktop/output/val_split/labels"},
    "test": {"images": "C:/Users/Anirudh/OneDrive/Desktop/output/test_split/images", "labels": "C:/Users/Anirudh/OneDrive/Desktop/output/test_split/labels"}
}

for split in split_paths:
    os.makedirs(split_paths[split]["images"], exist_ok=True)
    os.makedirs(split_paths[split]["labels"], exist_ok=True)

# Get all image files
image_files = [f for f in os.listdir(images_path) if os.path.isfile(os.path.join(images_path, f))]

# Ensure image files have corresponding labels
image_files = [f for f in image_files if os.path.splitext(f)[0] + ".txt" in os.listdir(labels_path)]

# Shuffle the files
random.shuffle(image_files)

# Split the files
num_images = len(image_files)
num_train = int(train_ratio * num_images)
num_val = int(val_ratio * num_images)
num_test = num_images - num_train - num_val

train_files = image_files[:num_train]
val_files = image_files[num_train:num_train + num_val]
test_files = image_files[num_train + num_val:]

def copy_files(file_list, split):
    for file_name in file_list:
        # Copy image files
        src_image = os.path.join(images_path, file_name)
        dst_image = os.path.join(split_paths[split]["images"], file_name)
        shutil.copy(src_image, dst_image)
        
        # Copy label files
        label_file = os.path.splitext(file_name)[0] + ".txt"
        src_label = os.path.join(labels_path, label_file)
        dst_label = os.path.join(split_paths[split]["labels"], label_file)
        shutil.copy(src_label, dst_label)

# Copy the files to the respective directories
copy_files(train_files, "train")
copy_files(val_files, "val")
copy_files(test_files, "test")

print("Dataset split completed successfully.")
