In [2]:
import os
import random
import shutil

# Directories
images_repo_dir = "images_repo"
masks_repo_dir = "masks_repo"
images_dir = "images"
masks_dir = "masks"

# Number of new images to select
num_new_images = 1

# Ensure output directories exist (in case not)
os.makedirs(images_dir, exist_ok=True)
os.makedirs(masks_dir, exist_ok=True)

# List all processed images in images_dir
processed_images = [f for f in os.listdir(images_dir) if os.path.isfile(os.path.join(images_dir, f))]
processed_basenames = {os.path.splitext(img)[0] for img in processed_images}

# List all images in images_repo
repo_images = [f for f in os.listdir(images_repo_dir) if os.path.isfile(os.path.join(images_repo_dir, f))]

# Filter out images that are already processed
unprocessed_images = []
for img in repo_images:
    base_name = os.path.splitext(img)[0]
    if base_name not in processed_basenames:
        unprocessed_images.append(img)

if len(unprocessed_images) < num_new_images:
    raise ValueError(f"Not enough unprocessed images in {images_repo_dir} to select {num_new_images} samples.")

# Randomly select 11 new images
selected_images = random.sample(unprocessed_images, num_new_images)

for img in selected_images:
    base_name = os.path.splitext(img)[0]

    # Copy the image to images_dir
    src_img = os.path.join(images_repo_dir, img)
    dst_img = os.path.join(images_dir, img)
    shutil.copy(src_img, dst_img)

    # Find the corresponding mask in masks_repo_dir
    # We assume there's exactly one mask file with the same base_name
    possible_masks = [m for m in os.listdir(masks_repo_dir) if os.path.splitext(m)[0] == base_name]

    if len(possible_masks) == 1:
        mask_file = possible_masks[0]
        src_mask = os.path.join(masks_repo_dir, mask_file)
        dst_mask = os.path.join(masks_dir, mask_file)
        shutil.copy(src_mask, dst_mask)
    elif len(possible_masks) == 0:
        print(f"Warning: No corresponding mask found for {img}")
    else:
        print(f"Warning: Multiple masks found for {img}: {possible_masks}")

print(f"Finished selecting and copying {num_new_images} new images and their masks.")

Finished selecting and copying 1 new images and their masks.
