# Data augmentation Experiments

For this project we will implement different types of data augmentations as well as custom data augmentation

## Imports


In [None]:
# GradScaler is used for mixed precision training in PyTorch.
from torch.amp import GradScaler
import sys
from pathlib import Path
sys.path.append(str(Path().resolve().parent / "src"))

Classes and augmentation functions

In [None]:
# Import necessary functions and classes
from un_detector.data.datasets import HazmatDataset
from un_detector.data.augmentation import (
    Compose,
    ToTensor,
    RandomHorizontalFlip,
    RandomBrightnessCont,
    RandomBlur,
    RandomRotate,
    RandomZoom,
    get_augmented_transform,
    visualize_augmentations
)
from un_detector.utils.file_io import save_json, tensor_to_list

  from .autonotebook import tqdm as notebook_tqdm
  check_for_updates()


# Faster R-CNN Data Augmentations

In [9]:
# Initialize the GradScaler for mixed precision training
scaler = GradScaler()

In [None]:
# Define your augmentations
augmentations = get_augmented_transform(train=True)

# Create directories to save augmented data
base_dir = 'data/processed/prorail_coco_format/augmented_data'
augmented_images_dir = base_dir + '/images'
# Define the path for augmented annotations
augmented_annotations_file = base_dir + '/annotations/instances_aug.json'
os.makedirs(augmented_images_dir, exist_ok=True)

# Remove all files in the augmented_images_dir
if os.path.exists(augmented_images_dir):
    for file in os.listdir(augmented_images_dir):
        file_path = os.path.join(augmented_images_dir, file)
        try:
            if os.path.isfile(file_path) or os.path.islink(file_path):
                os.unlink(file_path)  # Remove the file or symlink
            elif os.path.isdir(file_path):
                os.rmdir(file_path)  # Remove the directory (if empty)
        except Exception as e:
            print(f"Error deleting file {file_path}: {e}")
else:
    print(f"Directory {augmented_images_dir} does not exist.")


# Check if the augmented annotations file exists, if not create it
if os.path.exists(augmented_annotations_file):
    os.remove(augmented_annotations_file)  # Delete the existing file
    print(f"Deleted existing annotations file: {augmented_annotations_file}")

In [None]:
# Create a default structure for the augmented annotations
categories_list = [
    {"id": 1, "name": "hazmat code"}
]

# Load the original dataset, we use train in this instance to apply augmentations 
# but we will not use it for training and you might actually want to concat the train, validation and test datasets for this use case.
original_dataset = HazmatDataset(
    root='data/processed/prorail_coco_format/formatted_data/train',
    ann_file='annotations/instances_train.json',
    transforms=augmentations,
    train=False
)

augmented_annotations = {
    'images': [],
    'annotations': [],
    'categories': categories_list
}
save_json(augmented_annotations, augmented_annotations_file)
print(f"Created new annotations file: {augmented_annotations_file}")


# Counter for image and annotation IDs
img_id = len(augmented_annotations['images'])
ann_id = len(augmented_annotations['annotations'])

# Number of augmented images to create
num_images_to_generate = 1000  # Set this to your desired limit

# Iterate through the dataset with progress tracking
generated_count = 0  # Counter for how many images we have generated
for idx in tqdm(range(len(original_dataset)), desc="Augmenting dataset"):
    if generated_count >= num_images_to_generate:
        break  # Stop when the desired number of augmented images is created
    
    img, target = original_dataset[idx]
    
    # Apply augmentations
    aug_img, aug_target = augmentations(img, target)
    
    # Save augmented image
    aug_img_path = os.path.join(augmented_images_dir, f'aug_{img_id}.jpg')
    if isinstance(aug_img, torch.Tensor):
        aug_img = F.to_pil_image(aug_img)
    aug_img.save(aug_img_path)
    
    # Update image annotation
    augmented_annotations['images'].append({
        'id': img_id,
        'file_name': os.path.basename(aug_img_path),
        'width': aug_img.width,
        'height': aug_img.height
    })
    
    # Update annotations
    for box, label in zip(aug_target['boxes'], aug_target['labels']):
        augmented_annotations['annotations'].append({
            'id': ann_id,
            'image_id': img_id,
            'category_id': label.item(),
            'bbox': [
                box[0].item(), box[1].item(), 
                box[2].item() - box[0].item(), 
                box[3].item() - box[1].item()
            ],
            'area': (box[2] - box[0]) * (box[3] - box[1]),
            'iscrowd': 0
        })
        ann_id += 1

    # Update the image and annotation IDs
    img_id += 1
    generated_count += 1  # Increase the generated image counter

# Convert tensors to lists before saving the annotations
augmented_annotations = tensor_to_list(augmented_annotations)

# Save augmented annotations
save_json(augmented_annotations, augmented_annotations_file)

print(f"Generated {generated_count} augmented images.")

In [None]:
visualize_augmentations(augmented_images_dir, augmented_annotations_file, num_images=5)

# YOLOv11
For Yolov11 we can use the standard ultralytics library for data augmentation

In [None]:
# example of training a YOLOv11 model with the augmentations applied
results = model.train(
    data=path+'\\yolo\\dataset.yaml', 
    epochs=10,
    scale=0.5,
    shear=1.1,
    device=device,
    degrees=10.5,
    perspective=0.5,
    mosaic=0.5,
    hsv_h=0.015,
    hsv_s=0.7,
    hsv_v=0.4,
    multiscale=True,
    )
model.save("data\\yolo\\yolo11n_trained.pt")

# Weather Augmentations
This code is used to augment images with weather conditions, however the annotation algorithm has not been written yet, therefore this can only be used for human evaluation (e.g. visualising how the model performs under weather augmentations)

In [None]:
import os
import random
from PIL import Image
from typing import Union
import cv2
def generate_augmented_images(
    input_image_path: str,
    output_dir: str,
    augmentation: Union[str, A.Compose],
    num_augmentations: int = 5,
    seed: int = None,
    quality: int = 95,
    prefix: str = "aug",
    verbose: bool = True,
    augmentation_presets: dict = None
) -> list:
    """
    Generate and save augmented images to a specified directory.
    
    Parameters:
    - input_image_path: Path to source image (str)
    - output_dir: Output directory path (str) - will be created if not exists
    - augmentation: Albumentations transform or preset name ('rain', 'fog', etc.)
    - num_augmentations: Number of augmented versions to create (int)
    - seed: Optional random seed for reproducibility (int)
    - quality: Output JPEG quality (1-100)
    - prefix: Filename prefix for output images
    - verbose: Print progress messages
    
    Returns:
    List of saved file paths (list[str])
    """
    # Validate inputs
    if not os.path.isfile(input_image_path):
        raise FileNotFoundError(f"Input image not found: {input_image_path}")
    
    if seed is not None:
        random.seed(seed)
        os.environ['PYTHONHASHSEED'] = str(seed)
    
    # Create output directory
    os.makedirs(output_dir, exist_ok=True)
    
    # Load image
    image = cv2.cvtColor(cv2.imread(input_image_path), cv2.COLOR_BGR2RGB)
    base_name = os.path.splitext(os.path.basename(input_image_path))[0]
    
    # Only initialize augmentation presets if not provided
    if augmentation_presets is None:
        # Define default augmentation presets
        augmentation_presets = {
            'rain': A.RandomRain(
                brightness_coefficient=0.9,  # Slightly darker rain
                drop_width=3, # Thicker rain drops
                blur_value=5, # Stronger blur effect
                p=1, # Always apply rain
                drop_length=20), # Longer rain streaks
            'sun_flare': A.RandomSunFlare(
                flare_roi=(0, 0, 1, 0.5), # Flare in the upper half of the image
                angle_lower=0.5, # Angle of tronger sun flare
                p=1), # Always apply sun flare
            'shadow': A.RandomShadow(
                num_shadows_lower=10, # More shadows
                num_shadows_upper=15, 
                shadow_dimension=8, # Larger and darker shadows
                shadow_roi=(0, 0, 1, 1), # Shadows across the entire image
                p=1), # Always apply shadows
            'fog': A.RandomFog(p=1), # Always apply fog
            
            # 'snow': A.RandomSnow(p=1) # Snow is really unrealistic and not recommended
        }

    
    # Configure augmentation pipeline
    if isinstance(augmentation, str):
        if augmentation not in augmentation_presets:
            raise ValueError(f"Unknown preset: {augmentation}. Available: {list(augmentation_presets.keys())}")
        transform = A.Compose([augmentation_presets[augmentation]])
    else:
        transform = augmentation
    
    # Generate augmented images
    saved_paths = []
    for i in range(num_augmentations):
        try:
            augmented = transform(image=image)['image']
            output_path = os.path.join(output_dir, f"{augmentation}-{i}-{prefix}_{base_name}.jpg")
            
            Image.fromarray(augmented).save(
                output_path,
                quality=quality,
                optimize=True,
                subsampling=0  # Keep highest chroma resolution
            )
            
            saved_paths.append(output_path)
            
            if verbose:
                print(f"Saved: {output_path}")
                
        except Exception as e:
            print(f"Error generating augmentation {i+1}: {str(e)}")
    
    return saved_paths