In [6]:
import os
import random
from PIL import Image
import torchvision.transforms as transforms
from torchvision.utils import save_image
from tqdm import tqdm

In [11]:
# Paths
source_directory = "../../dataset/curated-dataset"
destination_directory = "../../dataset/augmented"
 
# Ensure destination directory exists
os.makedirs(destination_directory, exist_ok=True)
 

In [12]:
# Augmentation transforms

augmentation_transforms = [
    transforms.RandomHorizontalFlip(p=1),
    # transforms.RandomVerticalFlip(p=1),
    #transforms.RandomRotation(degrees=270),

    #transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)),
    #transforms.RandomResizedCrop(size=(512, 512), scale=(0.8, 1.2))
    #transforms.RandomPerspective(),


    transforms.ColorJitter(brightness=0.6),
    transforms.ColorJitter(contrast=0.6),
    #transforms.ElasticTransform(alpha=250.0),
    #transforms.RandomAdjustSharpness(sharpness_factor=2)
]

In [13]:
# Load and augment images
def augment_images(class_dir, dest_class_dir, required_count):
    image_files = [f for f in os.listdir(class_dir) if os.path.isfile(os.path.join(class_dir, f))]

    # Shuffle the image files
    random.shuffle(image_files)
    
    original_count = len(image_files)
    augmentations_per_image = (required_count - original_count) // original_count
    
    for i, image_file in enumerate(tqdm(image_files, desc=f"Processing {class_dir}")):
        img_path = os.path.join(class_dir, image_file)
        img = Image.open(img_path)
        save_image(transforms.ToTensor()(img), os.path.join(dest_class_dir, f"{i}.jpg"))
        
        # Apply augmentations
        for j in range(augmentations_per_image):
            transform = transforms.Compose([
                random.choice(augmentation_transforms),
                transforms.Resize((512, 512))
            ])
            augmented_img = transform(img)
            save_image(transforms.ToTensor()(augmented_img), os.path.join(dest_class_dir, f"{i}_{j}.jpg"))
    
    # If needed, add additional augmentations to reach the required count
    augmented_count = len(os.listdir(dest_class_dir))
    while augmented_count < required_count:
        for image_file in image_files:
            img_path = os.path.join(class_dir, image_file)
            img = Image.open(img_path)
            transform = transforms.Compose([
                random.choice(augmentation_transforms),
                transforms.Resize((512, 512))
            ])
            augmented_img = transform(img)
            save_image(transforms.ToTensor()(augmented_img), os.path.join(dest_class_dir, f"extra_{augmented_count}.jpg"))
            augmented_count += 1
            if augmented_count >= required_count:
                break


In [None]:
 
# Process each class
for class_name in os.listdir(source_directory):
    class_dir = os.path.join(source_directory, class_name)
    dest_class_dir = os.path.join(destination_directory, class_name)
    os.makedirs(dest_class_dir, exist_ok=True)
    augment_images(class_dir, dest_class_dir, 2000)
 
print("Data augmentation completed.")

Processing ../../dataset/curated-dataset/minor_crack: 100%|████████████| 624/624 [00:24<00:00, 25.81it/s]
Processing ../../dataset/curated-dataset/peeling: 100%|████████████████| 520/520 [00:19<00:00, 26.64it/s]
Processing ../../dataset/curated-dataset/plain: 100%|██████████████████| 600/600 [00:22<00:00, 26.76it/s]
Processing ../../dataset/curated-dataset/stain: 100%|██████████████████| 521/521 [00:20<00:00, 25.86it/s]
Processing ../../dataset/curated-dataset/algae:  31%|█████▋            | 195/620 [00:08<00:18, 22.47it/s]