# Introduction

This notebook implements a data augmentation pipeline such that we have a large (relatively) repertoire of sketches for FAISS to index on. Currently we have 8 original sketches, we will be generating 10 augmentations per image to get a total of (80 + 8) 88 sketches.

In [None]:
import albumentations as A
import cv2, os

def augment_sketches(input_folder: str, output_folder: str, augmentations_per_image: int = 5):
    """
    Applies data augmentations to architectural sketches to simulate natural variations without changing size.

    Args:
        input_folder (str): Folder containing original sketches.
        output_folder (str): Folder to save augmented sketches.
        augmentations_per_image (int): Number of augmented copies to create per image. Default value is 5.
    """
    if not os.path.exists(input_folder):
        raise NotADirectoryError("The input folder does not exist.")
    os.makedirs(output_folder, exist_ok=True)

    # ** Defining augmentation policy
    augment = A.Compose(
        [
            A.Affine(scale=(0.98, 1.02), translate_percent=(0.01, 0.02), rotate=(-7, 7), shear=(-2, 2), fit_output=False, border_mode=cv2.BORDER_REFLECT, p=0.9),
            A.OneOf(
                [
                    A.RandomBrightnessContrast(brightness_limit=0.1, contrast_limit=0.1, p=0.8),
                    A.GaussianBlur(blur_limit=(3, 5), p=0.5),
                ],
                p=0.8
            ),
            A.HorizontalFlip(p=0.5),
            A.Perspective(scale=(0.02, 0.05), p=0.3),
            A.ImageCompression(quality_range=(90, 100), p=0.2)
        ]
    )

    # ** Apply to each image
    for fname in os.listdir(input_folder):
        if not fname.lower().endswith(".png"):
            continue

        img_path = os.path.join(input_folder, fname)
        img = cv2.imread(img_path)

        if img is None:
            print(f"Skipping unreadable image: {fname}")
            continue

        # ** Saving the original image
        cv2.imwrite(os.path.join(output_folder, fname), img)

        for i in range(1, augmentations_per_image + 1):
            aug_img = augment(image=img)["image"]
            new_name = f"{os.path.splitext(fname)[0]}_aug{i}.png"
            cv2.imwrite(os.path.join(output_folder, new_name), aug_img)
    
    print(f"Augmentation completed. Results saved in '{output_folder}'")

In [4]:
input_folder = "/home/ayushkum/archimera/inputs/input_png"
output_folder = "/home/ayushkum/archimera/augmented/input_png"
augmentation_per_image = 10

augment_sketches(
    input_folder=input_folder,
    output_folder=output_folder,
    augmentations_per_image=augmentation_per_image
)

Augmentation completed. Results saved in '/home/ayushkum/archimera/augmented/input_png'
