In [1]:
import os
import random
import numpy as np
from PIL import Image
import imgaug.augmenters as iaa

In [2]:
#Uncomment lower line if you using numpy latest version or it will give error
np.bool = np.bool_

def save_augmented_images(folder_path, images, augmenter, target):
    current_count = len(images)
    image_index = 0  # To ensure unique filenames

    while current_count < target:
        try:
            # Randomly choose an image
            img_path = random.choice(images)
            img = Image.open(img_path).convert("RGB")  # Ensure RGB mode
            img_array = np.array(img)

            # Apply augmentation
            augmented_image = augmenter(image=img_array)
            augmented_image = Image.fromarray(augmented_image)

            # Generate a unique filename
            new_filename = os.path.join(folder_path, f"aug_{image_index}.jpg")
            while os.path.exists(new_filename):  # Ensure uniqueness
                image_index += 1
                new_filename = os.path.join(folder_path, f"aug_{image_index}.jpg")

            # Save the augmented image
            augmented_image.save(new_filename)

            current_count += 1
            image_index += 1  # Update index for next iteration
            
        except Exception as e:
            print(f"Error processing image {img_path}: {e}")

def augment_data(path, size):
    dataset_path = path
    target_count = size  

    augmenter = iaa.Sequential([
        iaa.SomeOf((3, 5), [
            iaa.Sharpen(alpha=(0, 0.5), lightness=(0.75, 1.25)),  # Sharpening
            iaa.Crop(percent=(0, 0.1)),  # Slight cropping
            iaa.Sometimes(0.5, iaa.Affine(rotate=(-10, 10))),  # Mild rotation
            iaa.Sometimes(0.5, iaa.LinearContrast((0.9, 1.1))),  # Contrast adjustment
            iaa.Fliplr(0.5),  # Horizontal flip
            iaa.Flipud(0.3),  # Vertical flip
        ])
    ], random_order=True)

    # Detect folders in the dataset directory
    folders = [d for d in os.listdir(dataset_path) if os.path.isdir(os.path.join(dataset_path, d))]

    # Augment images for each detected folder
    for folder in folders:
        folder_path = os.path.join(dataset_path, folder)
        images = [os.path.join(folder_path, img) for img in os.listdir(folder_path) if img.endswith(('.png', '.jpg', '.jpeg'))]

        # Perform augmentation if needed
        if len(images) < target_count:
            print(f"Augmenting {folder} from {len(images)} to {target_count} images.")
            save_augmented_images(folder_path, images, augmenter, target_count)
        else:
            print(f"{folder} already has {len(images)} images or more.")

    print("Data augmentation completed!")

In [3]:
augment_data(r"HAM10000_organized", 6705)

Augmenting akiec from 327 to 6705 images.
Augmenting bcc from 514 to 6705 images.
Augmenting bkl from 1099 to 6705 images.
Augmenting df from 115 to 6705 images.
Augmenting mel from 1113 to 6705 images.
nv already has 6705 images or more.
Augmenting vasc from 142 to 6705 images.
Data augmentation completed!


In [10]:
augment_data(r"ISIC_2019_organized", 8000)

Augmenting AK from 1241 to 8000 images.
Augmenting BCC from 4298 to 8000 images.
Augmenting BKL from 3284 to 8000 images.
Augmenting DF from 330 to 8000 images.
Augmenting MEL from 5849 to 8000 images.
NV already has 15370 images or more.
Augmenting SCC from 793 to 8000 images.
Augmenting UNK from 2047 to 8000 images.
Augmenting VASC from 357 to 8000 images.
Data augmentation completed!
