In [14]:
import os
import shutil
import random

# Define the paths
labels_folder = '../../../Downloads/yolo-tiling-main/yolo_tiler/tiled_data/valid/labels'  # Path to the folder containing label files
images_folder = '../../../Downloads/yolo-tiling-main/yolo_tiler/tiled_data/valid/images'
new_dataset_folder = './tiled_data/valid/'  # Path to the new dataset folder

# Create the new dataset folder if it doesn't exist
os.makedirs(new_dataset_folder, exist_ok=True)
os.makedirs(os.path.join(new_dataset_folder, 'images'), exist_ok=True)
os.makedirs(os.path.join(new_dataset_folder, 'labels'), exist_ok=True)

# Lists to store images with and without bounding boxes
images_with_boxes = []
images_without_boxes = []
# Read the annotation files
for label_file in os.listdir(labels_folder):
    if label_file.endswith('.txt'):
        label_path = os.path.join(labels_folder, label_file)
        with open(label_path, 'r') as file:
            lines = file.readlines()
            if lines:  # If there are annotations in the file
                images_with_boxes.append(label_file.replace('.txt', ''))
            else:
                images_without_boxes.append(label_file.replace('.txt', ''))

# Select a few random images without bounding boxes
random.shuffle(images_without_boxes)
selected_images_without_boxes = images_without_boxes[:150]  # Change 150 to the number of examples you want

# Combine the lists
selected_images = images_with_boxes + selected_images_without_boxes
successful_copies = 0
failed_copies = 0

for image_name in selected_images:
    try:
        # Check if image exists
        src_image_path = os.path.join(images_folder, image_name + '.jpg')
        src_label_path = os.path.join(labels_folder, image_name + '.txt')
        
        if not os.path.exists(src_image_path):
            print(f"Warning: Image not found: {src_image_path}")
            failed_copies += 1
            continue
            
        # Copy image
        dst_image_path = os.path.join(new_dataset_folder, 'images', image_name + '.jpg')
        shutil.copy(src_image_path, dst_image_path)

        # Copy annotation
        dst_label_path = os.path.join(new_dataset_folder, 'labels', image_name + '.txt')
        shutil.copy(src_label_path, dst_label_path)
        
        successful_copies += 1
        
    except Exception as e:
        print(f"Error processing {image_name}: {str(e)}")
        failed_copies += 1
        continue

print(f"Dataset creation completed:")
print(f"Successfully copied: {successful_copies} image-label pairs")
print(f"Failed to copy: {failed_copies} image-label pairs")

Dataset creation completed:
Successfully copied: 647 image-label pairs
Failed to copy: 0 image-label pairs
