In [None]:
!pip install albumentations opencv-python numpy



In [None]:
import os
import cv2
import random
import numpy as np
from albumentations import (
    HorizontalFlip, Rotate, ShiftScaleRotate, RandomBrightnessContrast,
    Perspective, ColorJitter, Compose, BboxParams
)
from albumentations.core.composition import OneOf

# Define Paths
base_dir = "/content/drive/MyDrive/your_dataset_folder"  # Update with your dataset folder
output_dir = "/content/drive/MyDrive/augmented_dataset"  # Update with your output folder
folders = ["train", "test", "valid"]

if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# Parameters
target_image_count = 1000
augmentations_per_image = 20

# Augmentation Pipeline
def get_augmentation_pipeline():
    return Compose(
        [
            OneOf([
                Rotate(limit=20, p=1.0),
                ShiftScaleRotate(shift_limit=0.02, scale_limit=0.2, rotate_limit=20, p=1.0),
                Perspective(scale=(0.02, 0.06), p=1.0),
            ], p=0.8),
            RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.5),
            ColorJitter(hue=0.2, saturation=0.2, brightness=0.2, contrast=0.2, p=0.5),
        ],
        bbox_params=BboxParams(format="yolo", label_fields=["class_labels"]),
    )

# Read Labels
def read_labels(label_path):
    with open(label_path, "r") as f:
        lines = f.readlines()
    bboxes = []
    class_labels = []
    for line in lines:
        values = line.strip().split()
        class_id = int(values[0])
        bbox = list(map(float, values[1:]))
        bboxes.append(bbox)
        class_labels.append(class_id)
    return bboxes, class_labels

# Write Labels
def write_labels(label_path, bboxes, class_labels):
    with open(label_path, "w") as f:
        for class_id, bbox in zip(class_labels, bboxes):
            f.write(f"{class_id} {' '.join(map(str, bbox))}\n")

# Augment Dataset
def augment_dataset(base_dir, output_dir, target_image_count, augmentations_per_image):
    pipeline = get_augmentation_pipeline()

    for folder in folders:  # Process train, test, and valid
        print(f"Processing folder: {folder}")

        input_image_dir = os.path.join(base_dir, folder, "images")
        input_label_dir = os.path.join(base_dir, folder, "labels")
        output_image_dir = os.path.join(output_dir, folder, "images")
        output_label_dir = os.path.join(output_dir, folder, "labels")

        # Create output directories if they do not exist
        os.makedirs(output_image_dir, exist_ok=True)
        os.makedirs(output_label_dir, exist_ok=True)

        # List all images in the current folder
        images = [f for f in os.listdir(input_image_dir) if f.endswith((".jpg", ".png"))]
        num_images = len(images)
        generated_count = 0

        for image_name in images:
            image_path = os.path.join(input_image_dir, image_name)
            label_path = os.path.join(input_label_dir, image_name.replace(".jpg", ".txt").replace(".png", ".txt"))

            # Load the image and corresponding labels
            img = cv2.imread(image_path)
            bboxes, class_labels = read_labels(label_path)

            for i in range(augmentations_per_image):
                augmented = pipeline(image=img, bboxes=bboxes, class_labels=class_labels)
                aug_img = augmented["image"]
                aug_bboxes = augmented["bboxes"]
                aug_class_labels = augmented["class_labels"]

                # Save augmented image
                aug_image_name = f"{os.path.splitext(image_name)[0]}_aug_{i + generated_count}.jpg"
                aug_image_path = os.path.join(output_image_dir, aug_image_name)
                cv2.imwrite(aug_image_path, aug_img)

                # Save updated labels
                aug_label_name = f"{os.path.splitext(image_name)[0]}_aug_{i + generated_count}.txt"
                aug_label_path = os.path.join(output_label_dir, aug_label_name)
                write_labels(aug_label_path, aug_bboxes, aug_class_labels)

                generated_count += 1

                # Stop augmentation if the target count is reached
                if generated_count >= target_image_count - num_images:
                    break

        print(f"Completed folder: {folder}, Total Augmented: {generated_count} images.")

# Run Augmentation
augment_dataset(base_dir, output_dir, target_image_count, augmentations_per_image)
