In [2]:
!python -m pip install imgaug

Collecting imgaug
  Downloading imgaug-0.4.0-py2.py3-none-any.whl.metadata (1.8 kB)
Collecting scikit-image>=0.14.2 (from imgaug)
  Downloading scikit_image-0.25.0-cp310-cp310-win_amd64.whl.metadata (14 kB)
Collecting imageio (from imgaug)
  Downloading imageio-2.36.1-py3-none-any.whl.metadata (5.2 kB)
Collecting Shapely (from imgaug)
  Downloading shapely-2.0.6-cp310-cp310-win_amd64.whl.metadata (7.2 kB)
Collecting networkx>=3.0 (from scikit-image>=0.14.2->imgaug)
  Downloading networkx-3.4.2-py3-none-any.whl.metadata (6.3 kB)
Collecting tifffile>=2022.8.12 (from scikit-image>=0.14.2->imgaug)
  Downloading tifffile-2024.12.12-py3-none-any.whl.metadata (31 kB)
Collecting lazy-loader>=0.4 (from scikit-image>=0.14.2->imgaug)
  Downloading lazy_loader-0.4-py3-none-any.whl.metadata (7.6 kB)
Downloading imgaug-0.4.0-py2.py3-none-any.whl (948 kB)
   ---------------------------------------- 0.0/948.0 kB ? eta -:--:--
   ---------------------------------------- 0.0/948.0 kB ? eta -:--:--
   --

In [1]:
import os
import random
import numpy as np
from PIL import Image
import imgaug.augmenters as iaa

# Path to your dataset folder
dataset_path = "normalized_output of Blood Cancer2 vahadane"
target_count = 2000  # Target number of images per folder

augmenters = iaa.Sequential([
    iaa.Fliplr(0.5),  # Horizontal flip
    iaa.Flipud(0.1),  # Vertical flip
    iaa.Affine(
        scale=(0.95, 1.05),
        rotate=(-10, 10),
        shear=(-5, 5),
        mode='edge',  # Use edge pixel padding instead of black
        cval=0  # You can also try values like 128 or 255 for different backgrounds
    ),
    iaa.Crop(percent=(0, 0.05), keep_size=True),
])

# Function to save augmented images
def save_augmented_images(folder_path, images, augmenter, target):
    current_count = len(images)
    original_image = Image.open(images[0])
    original_size = original_image.size  # Preserve original dimensions

    while current_count < target:
        # Randomly choose an image
        img_path = random.choice(images)
        img = Image.open(img_path)
        img_array = np.array(img, dtype=np.uint8)
        
        # Apply augmentation
        augmented_image_array = augmenter(image=img_array)
        augmented_image = Image.fromarray(augmented_image_array).resize(original_size, Image.Resampling.LANCZOS)
        
        # Save the augmented image with high quality
        new_filename = os.path.join(folder_path, f"aug_{current_count}.jpg")
        augmented_image.save(new_filename, quality=100)  # Save with perfect quality
        
        current_count += 1

# Detect folders in the dataset directory
folders = [d for d in os.listdir(dataset_path) if os.path.isdir(os.path.join(dataset_path, d))]

# Augment images for each detected folder
for folder in folders:
    folder_path = os.path.join(dataset_path, folder)
    images = [os.path.join(folder_path, img) for img in os.listdir(folder_path) if img.endswith(('.png', '.jpg', '.jpeg'))]
    
    # Perform augmentation if needed
    if len(images) < target_count:
        print(f"Augmenting {folder} from {len(images)} to {target_count} images.")
        save_augmented_images(folder_path, images, augmenters, target_count)
    else:
        print(f"{folder} already has {len(images)} images or more.")

print("Data augmentation completed!")

Augmenting Benign from 512 to 2000 images.
Augmenting [Malignant] early Pre-B from 979 to 2000 images.
Augmenting [Malignant] Pre-B from 955 to 2000 images.
Augmenting [Malignant] Pro-B from 796 to 2000 images.
Data augmentation completed!
