In [None]:
import os
import cv2
import random
import numpy as np
import shutil

# --- Parameters ---
DATASET_DIR   = "../raw"   # original dataset with class folders
OUTPUT_DIR    = "../data" #augmented images
TARGET_PER_CLASS = 3000 
ORIGINAL_FRACTION = 0.7   # 70% original, 30% augmented

In [4]:
KEEP_ORIGINALS = int(TARGET_PER_CLASS * ORIGINAL_FRACTION)

os.makedirs(OUTPUT_DIR, exist_ok=True)

# --- Augmentation Functions ---
def random_flip(img):
    flip_code = random.choice([-1, 0, 1])  # -1=both, 0=vertical, 1=horizontal
    return cv2.flip(img, flip_code)

def random_rotate(img):
    angle = random.uniform(-10, 10)  # rotate between -10° and +10°
    h, w = img.shape[:2]
    M = cv2.getRotationMatrix2D((w//2, h//2), angle, 1.0)
    return cv2.warpAffine(img, M, (w, h), borderMode=cv2.BORDER_REFLECT_101)

def random_brightness(img):
    factor = 1.0 + random.uniform(-0.15, 0.15)
    img = img.astype(np.float32) * factor
    return np.clip(img, 0, 255).astype(np.uint8)

def random_grayscale(img):
    if random.random() < 0.2:  # 20% chance
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        return cv2.cvtColor(gray, cv2.COLOR_GRAY2BGR)
    return img

def augment_image(img):
    # Apply a random subset of augmentations
    if random.random() < 0.5: img = random_flip(img)
    if random.random() < 0.5: img = random_rotate(img)
    if random.random() < 0.5: img = random_brightness(img)
    img = random_grayscale(img)
    return img

In [5]:
# --- Process Each Class ---
for class_name in os.listdir(DATASET_DIR):
    class_path = os.path.join(DATASET_DIR, class_name)
    if not os.path.isdir(class_path):
        continue
    
    out_class_path = os.path.join(OUTPUT_DIR, class_name)
    os.makedirs(out_class_path, exist_ok=True)

    # List images
    images = [os.path.join(class_path, f) for f in os.listdir(class_path)
              if f.lower().endswith(('.jpg','.jpeg','.png'))]
    random.shuffle(images)

    # Keep originals
    originals = images[:KEEP_ORIGINALS]
    for i, img_path in enumerate(originals):
        shutil.copy(img_path, os.path.join(out_class_path, f"orig_{i:05d}.jpg"))

    # Augment until reaching target
    needed = TARGET_PER_CLASS - KEEP_ORIGINALS
    aug_count = 0
    while aug_count < needed:
        seed = random.choice(images)
        img = cv2.imread(seed)
        if img is None:
            continue
        aug_img = augment_image(img)
        out_path = os.path.join(out_class_path, f"aug_{aug_count:05d}.jpg")
        cv2.imwrite(out_path, aug_img)
        aug_count += 1

    print(f"[✓] {class_name} → {len(os.listdir(out_class_path))} images")

print("all images augmented")

[✓] A → 100 images
[✓] B → 100 images
[✓] C → 100 images
[✓] D → 100 images
[✓] del → 100 images
[✓] E → 100 images
[✓] F → 100 images
[✓] G → 100 images
[✓] H → 100 images
[✓] I → 100 images
[✓] J → 100 images
[✓] K → 100 images
[✓] L → 100 images
[✓] M → 100 images
[✓] N → 100 images
[✓] nothing → 100 images
[✓] O → 100 images
[✓] P → 100 images
[✓] Q → 100 images
[✓] R → 100 images
[✓] S → 100 images
[✓] space → 100 images
[✓] T → 100 images
[✓] U → 100 images
[✓] V → 100 images
[✓] W → 100 images
[✓] X → 100 images
[✓] Y → 100 images
[✓] Z → 100 images
all images augmented
