### This is code for data augmentation

You can use it to augment your image datasets. Just change the root folders and play the notebook.

In [1]:
%pip install albumentations

You should consider upgrading via the '/usr/local/bin/python3 -m pip install --upgrade pip' command.[0m
Note: you may need to restart the kernel to use updated packages.


In [13]:
import cv2
import os
import shutil
import albumentations as A
import numpy as np

# Define root folders [ Change the names depending on your current structure ]
dataset = "./datasets/ProcessedFruitDataset"
augmented_dataset = "./datasets/AugmentedFruitDataset"

# use a variable to count successful augmented images 
count_augmented_imaged = 0

In [14]:
# if you need more transformations to add, you can find them at: 
# https://albumentations.ai/docs/3-basic-usage/choosing-augmentations/

# Define augmentation pipeline
transform = A.Compose([
    A.HorizontalFlip(p=0.5),
    A.RandomBrightnessContrast(p=0.4),
    A.Rotate(limit=20, p=0.5),
])

In [15]:
num_augmentations = 1  # Number of augmented versions per image

# This will delete augmented_dataset if it exists
if os.path.exists(augmented_dataset):
    shutil.rmtree(augmented_dataset)
os.makedirs(augmented_dataset, exist_ok=True)

for root, _, files in os.walk(dataset):
    for filename in files:
        if filename.lower().endswith((".jpg", ".jpeg", ".png")):
            input_path = os.path.join(root, filename)

            # Preserve folder structure
            relative_path = os.path.relpath(input_path, dataset)
            output_path = os.path.join(augmented_dataset, relative_path)
            output_dir = os.path.dirname(output_path)
            os.makedirs(output_dir, exist_ok=True)

            # Load image, and check if it is an image
            image = cv2.imread(input_path)
            if image is None:
                print(f"Skipping unreadable image: {input_path}")
                continue

            # Save original (copy)
            shutil.copy2(input_path, output_path)

            # Apply augmentation with correct color map
            image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            augmented = transform(image=image_rgb)["image"]
            augmented_bgr = cv2.cvtColor(augmented, cv2.COLOR_RGB2BGR)

            # Save augmented image with prefix
            aug_filename = f"aug_{filename}"
            aug_path = os.path.join(output_dir, aug_filename)
            cv2.imwrite(aug_path, augmented_bgr)
            count_augmented_imaged += 1

print("Done augmenting: ", count_augmented_imaged, " images")

Done augmenting:  275  images
