In [1]:
import cv2
import numpy as np
import os
from tqdm import tqdm

## Global Parameters

In [None]:
IMAGE_EXTENSIONS = (".jpg", ".jpeg", ".png", ".bmp")
ROTATION_ANGLE = 15
BRIGHTNESS_BETA = 40
NOISE_STD = 12
ZOOM_CROP = (20, 220)

## Dataset Paths

In [None]:
INPUT_DATASET_PATH = r"C:\Users\pc\PycharmProjects\Project\dataset"
OUTPUT_DATASET_PATH = r"C:\Users\pc\PycharmProjects\Project\augmented_dataset"

## Image Augmentation Functions

In [None]:
def augment_image(img):
    """
    Apply multiple augmentation techniques to a single image.
    param img: Input image
    Returns a list of augmented images.
    """
    augmented_images = []
    h, w = img.shape[:2]

    # 1. Rotation
    M = cv2.getRotationMatrix2D((w // 2, h // 2), ROTATION_ANGLE, 1)
    rotated = cv2.warpAffine(img, M, (w, h))
    augmented_images.append(rotated)

    # 2. Horizontal flip
    flipped = cv2.flip(img, 1)
    augmented_images.append(flipped)

    # 3. Brightness adjustment
    brightness = cv2.convertScaleAbs(img, alpha=1, beta=BRIGHTNESS_BETA)
    augmented_images.append(brightness)

    # 4. Gaussian noise
    noise = img + np.random.normal(0, NOISE_STD, img.shape)
    noise = np.clip(noise, 0, 255).astype(np.uint8)
    augmented_images.append(noise)

    # 5. Zoom (crop + resize)
    cropped = img[ZOOM_CROP[0]:ZOOM_CROP[1], ZOOM_CROP[0]:ZOOM_CROP[1]]
    zoomed = cv2.resize(cropped, (w, h))
    augmented_images.append(zoomed)

    return augmented_images

## Dataset Augmentation Pipeline

In [3]:
def augment_dataset(input_path, output_path):
    """
    Apply augmentation to all images in the dataset
    and save them into the output directory.
    param input_path: Path to the input dataset
    param output_path: Path to the output dataset
    return: None
    """
    for category in os.listdir(input_path):
        input_folder = os.path.join(input_path, category)
        output_folder = os.path.join(output_path, category)

        if not os.path.isdir(input_folder):
            continue

        os.makedirs(output_folder, exist_ok=True)
        print(f"\nProcessing class: {category}")

        for img_name in tqdm(os.listdir(input_folder)):
            img_path = os.path.join(input_folder, img_name)
            img = cv2.imread(img_path)

            if img is None:
                continue

            # Save original image
            cv2.imwrite(os.path.join(output_folder, img_name), img)

            # Generate augmented images
            augmented_images = augment_image(img)

            for i, aug in enumerate(augmented_images):
                new_name = img_name.replace(".jpg", f"_aug{i}.jpg")
                cv2.imwrite(os.path.join(output_folder, new_name), aug)

## Dataset Statistics


In [None]:
def count_images_in_folder(folder_path):
    """
    Count images per class and total images in a dataset folder.
    param folder_path: Path to the dataset folder
    return: Tuple of (stats, total)
    """
    stats = {}

    for class_name in os.listdir(folder_path):
        class_path = os.path.join(folder_path, class_name)

        if not os.path.isdir(class_path):
            continue

        count = sum(
            file.lower().endswith(IMAGE_EXTENSIONS)
            for file in os.listdir(class_path)
        )

        stats[class_name] = count

    total = sum(stats.values())
    return stats, total


## Run Entry

In [None]:
if __name__ == "__main__":

    print(" Augmenting dataset...")
    augment_dataset(INPUT_DATASET_PATH, OUTPUT_DATASET_PATH)

    print("\n Counting images before augmentation:")
    stats_before, total_before = count_images_in_folder(INPUT_DATASET_PATH)
    for k, v in stats_before.items():
        print(f"{k}: {v}")
    print(f"Total: {total_before}")

    print("\n Counting images after augmentation:")
    stats_after, total_after = count_images_in_folder(OUTPUT_DATASET_PATH)
    for k, v in stats_after.items():
        print(f"{k}: {v}")
    print(f"Total: {total_after}")