In [1]:
import os
import shutil
import random

def copy_images_for_oversampling(input_folder, output_folder, images_to_add):
    """
    Copies images from input folders to output folders to balance data.

    Args:
        input_folder: Path to the folder containing the original images.
        output_folder: Path to the folder where the copied images will be stored.
        images_to_add: A dictionary where keys are folder names (e.g., "oa_doubtful")
                       and values are the number of images to add to each folder.
    """

    # Create the output folder if it doesn't exist
    os.makedirs(output_folder, exist_ok=True)

    for folder_name, num_images_to_add in images_to_add.items():
        # Construct the full path to the input subfolder
        input_subfolder = os.path.join(input_folder, folder_name)

        # Check if the input subfolder exists
        if not os.path.exists(input_subfolder):
            print(f"Warning: Input subfolder '{input_subfolder}' does not exist. Skipping.")
            continue

        # Construct the full path to the output subfolder
        output_subfolder = os.path.join(output_folder, folder_name)
        os.makedirs(output_subfolder, exist_ok=True)

        # Get a list of all image files in the input subfolder
        image_files = [
            f for f in os.listdir(input_subfolder) if os.path.isfile(os.path.join(input_subfolder, f)) and f.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp'))
        ]

        if not image_files:
          print(f"Warning: No images found in '{input_subfolder}'. Skipping.")
          continue

        # If the number of images to add is greater than the number of images available, adjust the count.
        num_images_to_add = min(num_images_to_add,len(image_files) * 5)

        print(f"Adding {num_images_to_add} images to '{folder_name}' folder.")

        # Copy random images to the output subfolder
        for i in range(num_images_to_add):
            # Randomly select an image from the input subfolder
            random_image_name = random.choice(image_files)
            source_path = os.path.join(input_subfolder, random_image_name)

            # Create a new name for the copied image (e.g., add "_copy_N" to the original name)
            base_name, extension = os.path.splitext(random_image_name)
            destination_name = f"{base_name}_copy_{i}{extension}"
            destination_path = os.path.join(output_subfolder, destination_name)

            # Copy the image
            shutil.copy2(source_path, destination_path)

            # print(f"Copied '{random_image_name}' to '{destination_name}'")

    print("Image copying completed.")


# Define input and output folders
input_folder = r"E:\DL-for-Knee-decease-diagnosis\output_clahe"
output_folder = r"E:\DL-for-Knee-decease-diagnosis\oversamplingClahe"

# Define the number of images to add for each folder
images_to_add = {
    "normal": 0,
    "oa_doubtful": 1089,
    "oa_mild": 920,
    "oa_moderate": 849,
    "oa_severe": 501,
}

# Call the function to copy the images
copy_images_for_oversampling(input_folder, output_folder, images_to_add)


Adding 0 images to 'normal' folder.
Adding 1089 images to 'oa_doubtful' folder.
Adding 920 images to 'oa_mild' folder.
Adding 849 images to 'oa_moderate' folder.
Adding 501 images to 'oa_severe' folder.
Image copying completed.
