In [None]:
# ==============================================
# 02_generate_images.ipynb
# Augmentation-based medical image generator
# ==============================================

import os, glob
import cv2
import numpy as np
from tqdm import tqdm
import albumentations as A
from PIL import Image

# ------------------------------------------------
# Paths
# ------------------------------------------------
input_dir = "/workspace/Data/train"
output_dir = "/workspace/Data/generated"
os.makedirs(output_dir, exist_ok=True)

# ------------------------------------------------
# Define augmentations
# ------------------------------------------------
augment = A.Compose([
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.3),
    A.RandomRotate90(p=0.4),
    A.ElasticTransform(alpha=50, sigma=5, alpha_affine=10, p=0.3),
    A.RandomBrightnessContrast(p=0.4),
    A.GaussNoise(var_limit=(5, 30), p=0.4),
    A.GridDistortion(p=0.3),
    A.RandomResizedCrop(height=256, width=256, scale=(0.8, 1.0), p=0.3)
])

# ------------------------------------------------
# Generate augmented images
# ------------------------------------------------
num_variants_per_image = 10   # adjust as you wish (e.g. 5, 20, etc.)

image_paths = glob.glob(os.path.join(input_dir, "*.png")) + \
              glob.glob(os.path.join(input_dir, "*.jpg")) + \
              glob.glob(os.path.join(input_dir, "*.jpeg"))

print(f"Found {len(image_paths)} source images.")
print(f"Generating {num_variants_per_image} variants per image...")

for img_path in tqdm(image_paths):
    img = cv2.imread(img_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

    for i in range(num_variants_per_image):
        augmented = augment(image=img)["image"]
        out_name = os.path.splitext(os.path.basename(img_path))[0]
        out_path = os.path.join(output_dir, f"{out_name}_aug_{i+1}.png")
        Image.fromarray(augmented).save(out_path)

print(f"✅ Done! Augmented images saved to: {output_dir}")