In [19]:
import os
import shutil
import random
from PIL import Image
from tqdm import tqdm # A library for nice progress bars

# --- ⚙️ CONFIGURATION - YOU MUST EDIT THIS! ---

# 1. List the full paths to your 4 original image folders (e.g., /mnt/c/...)
SOURCE_DIRECTORIES = [
    r"/mnt/c/Users/Pavelishko/Pictures/Хвоя/OriginalSize/kuusi",
    r"/mnt/c/Users/Pavelishko/Pictures/Хвоя/OriginalSize/mänty",
    r"/mnt/c/Users/Pavelishko/Pictures/Хвоя/OriginalSize/marjakuusi",
    r"/mnt/c/Users/Pavelishko/Pictures/Хвоя/OriginalSize/thuja"
]

# 2. Set a path for the NEW processed dataset
OUTPUT_DIRECTORY = r"/mnt/c/Users/Pavelishko/Pictures/Хвоя/NewSet_Patches_for_Culling"

# 3. Set the image and split specifications
# This now refers to the number of *original high-res images*
# that will be used to create patches for each set.
SPLITS = {
    'train': 104,
    'validation': 23,
    'test': 23
}

# 4. Set the Patching parameters
PATCH_SIZE = 512  # *2 for manual culling // The dimensions of the square patches (e.g., 1024x1024)
OVERLAP = 128    # *2 for manual culling // How much the patches should overlap (in pixels)

# ------------------------------------------------

# Calculate the step size (how far to move the crop window)
# A 1024 patch with 256 overlap means we move 768 pixels for the next patch
STEP_SIZE = PATCH_SIZE - OVERLAP

# List of file extensions to look for
IMAGE_EXTENSIONS = ['.jpg', '.jpeg', '.png', '.bmp', '.gif']

print("Configuration loaded.")
print(f"Creating {PATCH_SIZE}x{PATCH_SIZE} patches with a {STEP_SIZE}px step.")

Configuration loaded.
Creating 512x512 patches with a 384px step.


In [20]:
from PIL import Image

def create_and_save_patches(src_path, dest_dir, base_filename, patch_size, step):
    """
    Opens a single source image, crops it into large patches,
    RESIZES them to 256x256, and saves them as PNG files.

    WARNING: THIS DESTROYS FINE-GRAINED DETAIL.
    """
    try:
        patch_counter = 0
        final_size = (256, 256) # The final output size you wanted

        with Image.open(src_path) as img:
            img_width, img_height = img.size

            if img.mode != 'RGB':
                img = img.convert('RGB')

            x_coords = list(range(0, img_width - patch_size, step)) + [img_width - patch_size]
            y_coords = list(range(0, img_height - patch_size, step)) + [img_height - patch_size]

            x_coords = sorted(list(set(x_coords)))
            y_coords = sorted(list(set(y_coords)))

            for y in y_coords:
                for x in x_coords:
                    # 1. Crop the large 1020x1020 patch
                    box = (x, y, x + patch_size, y + patch_size)
                    patch = img.crop(box)

                    # 2. NEW LINE: Resize the patch down to 256x256
                    patch_resized = patch.resize(final_size, Image.Resampling.LANCZOS)

                    patch_filename = f"{base_filename}__patch_x{x}_y{y}.png"
                    dest_path = os.path.join(dest_dir, patch_filename)

                    # 3. MODIFIED LINE: Save the *resized* patch
                    patch_resized.save(dest_path, "PNG", compress_level=1)

                    patch_counter += 1

        return (src_path, patch_counter) # Return a tuple to track success

    except Exception as e:
        print(f"Error processing {src_path}: {e}")
        return (src_path, 0) # Return 0 patches on failure

print("Patching helper function defined (with CROP and RESIZE).")

Patching helper function defined (with CROP and RESIZE).


In [21]:
import multiprocessing
from tqdm import tqdm

# --- Helper function for multiprocessing ---
def process_task_wrapper(task_args):
    """
    Helper function to unpack arguments for pool.imap_unordered.
    It calls our main patching function.
    """
    return create_and_save_patches(*task_args)
# -------------------------------------------


print("Starting patch dataset creation...")

# --- 1. Build the 'To-Do List' (all tasks) ---
tasks = [] # This will be our list of all images to process
print(f"Scanning source directories...")

for source_folder in SOURCE_DIRECTORIES:
    class_name = os.path.basename(source_folder)
    print(f"\nScanning class: {class_name}...")

    # Create the single output directory for this class
    # e.g., .../NewSet_Patches_for_Culling/kuusi
    dest_dir = os.path.join(OUTPUT_DIRECTORY, class_name)
    os.makedirs(dest_dir, exist_ok=True)

    # Find and filter all images
    all_images = []
    for f in os.listdir(source_folder):
        if any(f.lower().endswith(ext) for ext in IMAGE_EXTENSIONS):
            all_images.append(os.path.join(source_folder, f))

    print(f"  Found {len(all_images)} original images to process.")

    # Add ALL images to the main 'tasks' list
    for src_path in all_images:
        # Use the original image filename as the base for the patch
        # e.g., kuusi_148  ->  kuusi_148__patch_x0_y0.png
        base_filename = os.path.splitext(os.path.basename(src_path))[0]

        # Add the job to our to-do list
        task_args = (src_path, dest_dir, base_filename, PATCH_SIZE, STEP_SIZE)
        tasks.append(task_args)

print(f"\n--- Built a 'to-do list' of {len(tasks)} total images. ---")


# --- 2. Execute tasks in parallel (with live progress) ---
print("Starting parallel processing pool...")
total_patches_generated = 0
results = []

# Use 'with' to automatically manage the pool
# This will use all available CPU cores
with multiprocessing.Pool() as pool:

    # Use imap_unordered to get results as they finish
    # Wrap this with tqdm to create the live progress bar
    with tqdm(total=len(tasks), desc="Processing images") as pbar:
        # pool.imap_unordered(function_to_call, list_of_arguments)
        for result in pool.imap_unordered(process_task_wrapper, tasks):
            # As soon as one job is done, 'result' gets its return value
            results.append(result)
            # Manually update the progress bar
            pbar.update(1)

    # Process results to get the total count
    for src_path, patch_count in results:
        if patch_count > 0:
            total_patches_generated += patch_count
        else:
            print(f"Warning: Failed to process {src_path}")


print("\n\n✅ Dataset processing complete!")
print(f"Created a total of {total_patches_generated} patches.")
print(f"Your new dataset is ready in: {OUTPUT_DIRECTORY}")
print("You can now open this folder in ImageGlass or FastStone to start deleting bad patches.")

Starting patch dataset creation...
Scanning source directories...

Scanning class: kuusi...
  Found 154 original images to process.

Scanning class: mänty...
  Found 150 original images to process.

Scanning class: marjakuusi...
  Found 158 original images to process.

Scanning class: thuja...
  Found 164 original images to process.

--- Built a 'to-do list' of 626 total images. ---
Starting parallel processing pool...


Processing images: 100%|██████████| 626/626 [01:22<00:00,  7.58it/s]



✅ Dataset processing complete!
Created a total of 40064 patches.
Your new dataset is ready in: /mnt/c/Users/Pavelishko/Pictures/Хвоя/NewSet_Patches_for_Culling
You can now open this folder in ImageGlass or FastStone to start deleting bad patches.



