In [None]:
# Download dataset using gdown

import gdown

bdd100k_url = 'https://drive.google.com/file/d/1b_WgEsT5uBwF6AypEbDsrBpKyWQu9m8p'
lol_url = 'https://drive.google.com/file/d/1yp0T3Zjk8p7UKm4Z8XqKhYRnsSgFcppw'
lol_synthetic url = 'https://drive.google.com/file/d/1eMj-eqe2JAh5lW5iucAO20YszlqfHNU0'

gdown.download(bdd100k_url, output='bdd100k.zip', quiet=False)
gdown.download(lol_url, output='lol.zip', quiet=False)
gdown.download(lol_synthetic_url, output='lol_synthetic.zip', quiet=False)

In [None]:
# unzip files to ./datasets folder
import zipfile

with zipfile.ZipFile("bdd100k.zip", "r") as zip_ref:
    zip_ref.extractall("datasets")

# with zipfile.ZipFile('lol.zip', 'r') as zip_ref:
#     zip_ref.extractall('datasets')

# with zipfile.ZipFile('lol_synthetic.zip', 'r') as zip_ref:
#     zip_ref.extractall('datasets')

In [4]:
# print the folder structure of ./datasets folder
import os
import sys


def print_folder_structure(folder, indent=0):
    for item in os.listdir(folder):
        if item.startswith("."):
            continue
        path = os.path.join(folder, item)
        if os.path.isdir(path):
            print(" " * indent + item + "/")
            print_folder_structure(path, indent + 2)
        else:
            # print(' ' * indent + item)
            pass


if __name__ == "__main__":
    print_folder_structure("./datasets")

dataset/
  trainB/
  trainA/
  testA/
  testB/


In [10]:
# The dataset folder has a file status.yaml that contains the status of the dataset
# set the status to 'augmented' to indicate that the dataset is already augmented
# set the status to 'original' to indicate that the dataset is the original dataset


# function to set the status of the dataset
def set_status(status):
    with open("./datasets/dataset/status.yaml", "w") as f:
        f.write("status: " + status + "\n")


# function to read the status of the dataset
def read_status():
    with open("./datasets/dataset/status.yaml", "r") as f:
        # if the file is empty, return 'original'
        if not f.read().strip():
            return "original"
        return f.read().strip().split(":")[-1].strip()

In [12]:
read_status()

'original'

In [15]:
import os
import random
from PIL import Image, ImageEnhance
from tqdm import tqdm


def augment_dataset(
    input_folder,
    output_folder,
    num_augmentations=5,
    rotation_range=(0, 360),
    scale_range=(0.8, 1.2),
    crop_size=(256, 256),
):
    """
    Augment dataset by rotations, scaling, cropping, and color jittering.

    Parameters:
        input_folder (str): Path to the input folder containing images.
        output_folder (str): Path to the output folder to save augmented images.
        num_augmentations (int): Number of augmented images to generate for each input image.
        rotation_range (tuple): Range of rotation angles in degrees.
        scale_range (tuple): Range of scaling factors.
        crop_size (tuple): Size of the cropped region (height, width).
    """
    # Create output folder if it doesn't exist
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    # List all images in the input folder
    image_files = [f for f in os.listdir(input_folder) if f.endswith(".png")]

    for image_file in tqdm(image_files, desc="Augmenting dataset"):
        # Open image
        image_path = os.path.join(input_folder, image_file)
        with Image.open(image_path) as img:
            # Apply augmentations
            for i in range(num_augmentations):
                augmented_img = img.copy()

                # Rotation
                rotation_angle = random.uniform(rotation_range[0], rotation_range[1])
                augmented_img = augmented_img.rotate(rotation_angle)

                # Scaling
                scale_factor = random.uniform(scale_range[0], scale_range[1])
                new_size = (
                    int(augmented_img.width * scale_factor),
                    int(augmented_img.height * scale_factor),
                )
                augmented_img = augmented_img.resize(new_size, Image.ANTIALIAS)

                # Random cropping
                crop_left = random.randint(0, augmented_img.width - crop_size[1])
                crop_top = random.randint(0, augmented_img.height - crop_size[0])
                crop_box = (
                    crop_left,
                    crop_top,
                    crop_left + crop_size[1],
                    crop_top + crop_size[0],
                )
                augmented_img = augmented_img.crop(crop_box)

                # Color jittering
                enhancer = ImageEnhance.Color(augmented_img)
                enhanced_img = enhancer.enhance(random.uniform(0.5, 1.5))

                # Save augmented image
                augmented_filename = os.path.splitext(image_file)[0] + f"_aug_{i}.png"
                augmented_filepath = os.path.join(output_folder, augmented_filename)
                enhanced_img.save(augmented_filepath)

    print("Dataset augmentation completed.")

Augmenting dataset:   0%|          | 0/485 [00:00<?, ?it/s]

  augmented_img = augmented_img.resize(new_size, Image.ANTIALIAS)
Augmenting dataset: 100%|██████████| 485/485 [01:15<00:00,  6.39it/s]


Dataset augmentation completed.


Augmenting dataset: 100%|██████████| 485/485 [01:03<00:00,  7.68it/s]

Dataset augmentation completed.





In [None]:
# Example usage:
augment_dataset("./datasets/bdd100k/trainA", "./datasets/augmented_bdd100k/trainA")
augment_dataset("./datasets/bdd100k/trainB", "./datasets/augmented_bdd100k/trainB")