In [None]:
pip install augmentor

Collecting augmentor
  Downloading Augmentor-0.2.12-py2.py3-none-any.whl.metadata (1.3 kB)
Downloading Augmentor-0.2.12-py2.py3-none-any.whl (38 kB)
Installing collected packages: augmentor
Successfully installed augmentor-0.2.12


In [21]:
import os
import cv2
from pathlib import Path
import Augmentor


def resize_image(image_path, size=(224, 224)):
    """
    Resizes an image to the specified size.

    Args:
        image_path (str): Path to the image.
        size (tuple): Desired size as (width, height).
    """
    try:
        img = cv2.imread(image_path)
        if img is None:
            raise ValueError(f"Image not found or invalid: {image_path}")
        resized_img = cv2.resize(img, size)
        cv2.imwrite(image_path, resized_img)
    except Exception as e:
        print(f"Error resizing image {image_path}: {e}")


def augment_and_resize_images(class_path, current_count, target_count, resize_to):
    """
    Augments and resizes images to meet the target count.

    Args:
        class_path (str): Path to the class folder.
        current_count (int): Current number of images in the class.
        target_count (int): Desired number of images.
        resize_to (tuple): Desired size for all images as (width, height).
    """
    # Augmentor to generate additional images
    p = Augmentor.Pipeline(class_path)
    p.set_save_format("JPEG")
    p.rotate(probability=0.7, max_left_rotation=10, max_right_rotation=10)
    p.flip_left_right(probability=0.5)
    p.zoom_random(probability=0.5, percentage_area=0.8)

    # Number of new images to generate
    num_to_generate = target_count - current_count
    p.sample(num_to_generate)

    # Move augmented images from "output" folder to the original folder
    output_dir = os.path.join(class_path, "output")
    if os.path.exists(output_dir):
        for augmented_img in os.listdir(output_dir):
            augmented_img_path = os.path.join(output_dir, augmented_img)
            resized_augmented_img_path = os.path.join(class_path, augmented_img)
            # Resize the augmented image
            resize_image(augmented_img_path, size=resize_to)
            # Move resized image to the class folder
            os.rename(augmented_img_path, resized_augmented_img_path)
        os.rmdir(output_dir)  # Remove the now-empty "output" folder

    print(f"Class at '{class_path}' augmented with {num_to_generate} new images.")


def balance_dataset_to_500_images(base_dir, target_count=500, resize_to=(224, 224)):
    """
    Balances the dataset by oversampling to the target count and resizing images.

    Args:
        base_dir (str): Path to the folder containing class subdirectories.
        target_count (int): Desired number of images per class.
        resize_to (tuple): Desired size for all images as (width, height).
    """
    print("Function started")
    # Iterate through each class folder
    for class_name in os.listdir(base_dir):
        class_path = os.path.join(base_dir, class_name)
        if not os.path.isdir(class_path):
            continue

        # Get all images in the class directory
        images = [str(p) for p in Path(class_path).glob("*") if p.is_file()]
        num_images = len(images)
        print(f"Class '{class_name}' has {num_images} images.")

        # Resize all existing images
        print(f"Resizing images in class '{class_name}' to {resize_to}...")
        for img_path in images:
            resize_image(img_path, size=resize_to)

        # Augment images to reach the target count
        if num_images < target_count:
            augment_and_resize_images(class_path, num_images, target_count, resize_to)
        else:
            print(f"Class '{class_name}' already has {num_images} images, which meets or exceeds the target.")
    print("Function Exiting")


# Set directory
base_dir = r"G:\My Drive\sinusities\dataset\validate"

print("Path exists:", os.path.exists(base_dir))

target_count = 200  # Updated target number of images per class
resize_to = (224, 224)  # image size

# Run the script
print("Calling the function...")
balance_dataset_to_500_images(base_dir, target_count, resize_to)
print("Function call complete")


Path exists: True
Calling the function...
Function started
Class 'invalid' has 114 images.
Resizing images in class 'invalid' to (224, 224)...
Error resizing image G:\My Drive\sinusities\dataset\validate\invalid\head-mri_thumb-1-732x549.avif: Image not found or invalid: G:\My Drive\sinusities\dataset\validate\invalid\head-mri_thumb-1-732x549.avif
Error resizing image G:\My Drive\sinusities\dataset\validate\invalid\desktop.ini: Image not found or invalid: G:\My Drive\sinusities\dataset\validate\invalid\desktop.ini
Initialised with 112 image(s) found.
Output directory set to G:\My Drive\sinusities\dataset\validate\invalid\output.

Processing <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=224x224 at 0x23747392770>: 100%|██████████| 86/86 [00:00<00:00, 100.34 Samples/s]


Class at 'G:\My Drive\sinusities\dataset\validate\invalid' augmented with 86 new images.
Class 'valid' has 73 images.
Resizing images in class 'valid' to (224, 224)...
Error resizing image G:\My Drive\sinusities\dataset\validate\valid\desktop.ini: Image not found or invalid: G:\My Drive\sinusities\dataset\validate\valid\desktop.ini
Initialised with 69 image(s) found.
Output directory set to G:\My Drive\sinusities\dataset\validate\valid\output.

Processing <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=224x224 at 0x2374737B9A0>: 100%|██████████| 127/127 [00:01<00:00, 116.56 Samples/s]


Class at 'G:\My Drive\sinusities\dataset\validate\valid' augmented with 127 new images.
Function Exiting
Function call complete
