## Notebook for augmenting images used for first stage YOLO model of 2-stage model pipline

In [8]:
import os
import cv2
import numpy as np
import hashlib
import random
import albumentations as A
from albumentations.pytorch import ToTensorV2

INPUT_DIR = '/path/to/input'
OUTPUT_DIR = '/path/to/output'


In [None]:
# Helper functions
def load_image_and_boxes(filename):
    img_path = os.path.join(INPUT_DIR, "images", filename + ".jpg")
    txt_path = os.path.join(INPUT_DIR, "labels", filename + ".txt")

    image = cv2.imread(img_path)
    height, width = image.shape[:2]

    boxes = []
    class_labels = []
    with open(txt_path, 'r') as f:
        for line in f:
            cls, x, y, w, h = map(float, line.strip().split())
            # 转为 [x_min, y_min, x_max, y_max]
            xmin = (x - w / 2) * width
            ymin = (y - h / 2) * height
            xmax = (x + w / 2) * width
            ymax = (y + h / 2) * height
            boxes.append([xmin, ymin, xmax, ymax])
            class_labels.append(int(cls))
    return image, boxes, class_labels, width, height

def save_augmented(image, boxes, class_labels, filename):
    hash_code = hashlib.md5(image.tobytes()).hexdigest()[:4]
    new_filename = f"{os.path.splitext(filename)[0]}-{hash_code}"

    image_out_path = os.path.join(OUTPUT_DIR, 'images', new_filename + ".jpg")
    label_out_path = os.path.join(OUTPUT_DIR, 'labels', new_filename + ".txt")

    os.makedirs(os.path.dirname(image_out_path), exist_ok=True)
    os.makedirs(os.path.dirname(label_out_path), exist_ok=True)

    height, width = image.shape[:2]
    cv2.imwrite(image_out_path, image)

    with open(label_out_path, 'w') as f:
        for cls, box in zip(class_labels, boxes):
            x_min, y_min, x_max, y_max = box
            # 转为YOLO格式
            x_center = ((x_min + x_max) / 2) / width
            y_center = ((y_min + y_max) / 2) / height
            w = (x_max - x_min) / width
            h = (y_max - y_min) / height
            f.write(f"{cls} {x_center:.6f} {y_center:.6f} {w:.6f} {h:.6f}\n")




In [None]:
# Add Gaussian noise
def add_gaussian_noise(filename):
    image, boxes, class_labels, _, _ = load_image_and_boxes(filename)

    noisy = image + np.random.normal(0, 25, image.shape).astype(np.uint8)
    noisy = np.clip(noisy, 0, 255).astype(np.uint8)

    save_augmented(noisy, boxes, class_labels, filename)


In [None]:
# Adj Brightness
def adjust_random_brightness(filename):
    image, boxes, class_labels, _, _ = load_image_and_boxes(filename)

    factor = random.uniform(0.5, 1.5)
    bright = np.clip(image.astype(np.float32) * factor, 0, 255).astype(np.uint8)

    save_augmented(bright, boxes, class_labels, filename)


In [None]:
# Block out random chunk

def add_black_rect(filename):
    image, boxes, class_labels, width, height = load_image_and_boxes(filename)

    max_attempts = 10
    for _ in range(max_attempts):
        rect_w = random.randint(int(0.05 * width), int(0.2 * width))
        rect_h = random.randint(int(0.05 * height), int(0.2 * height))
        x1 = random.randint(0, width - rect_w)
        y1 = random.randint(0, height - rect_h)
        x2 = x1 + rect_w
        y2 = y1 + rect_h

        overlap = False
        for box in boxes:
            xmin, ymin, xmax, ymax = box
            if not (x2 < xmin or x1 > xmax or y2 < ymin or y1 > ymax):
                overlap = True
                break
        if not overlap:
            image[y1:y2, x1:x2] = 0
            break

    save_augmented(image, boxes, class_labels, filename)


In [None]:
# Horizontal flip

def horizontal_flip(filename):
    image, boxes, class_labels, w, h = load_image_and_boxes(filename)

    transform = A.Compose([
        A.HorizontalFlip(p=1.0),
    ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['class_labels']))

    augmented = transform(image=image, bboxes=boxes, class_labels=class_labels)

    save_augmented(augmented['image'], augmented['bboxes'], augmented['class_labels'], filename)


In [None]:
# random rotate
def random_rotate(filename):
    image, boxes, class_labels, w, h = load_image_and_boxes(filename)

    transform = A.Compose([
        A.Rotate(limit=30, border_mode=cv2.BORDER_CONSTANT, value=0, p=1.0),
    ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['class_labels']))

    augmented = transform(image=image, bboxes=boxes, class_labels=class_labels)

    save_augmented(augmented['image'], augmented['bboxes'], augmented['class_labels'], filename)


In [None]:
# random scale
def random_scale_with_padding(filename):
    image, boxes, class_labels, w, h = load_image_and_boxes(filename)

    transform = A.Compose([
        A.RandomScale(scale_limit=(0.5, 1.5), p=1.0),
        A.PadIfNeeded(min_height=h, min_width=w, border_mode=cv2.BORDER_CONSTANT, value=0, p=1.0),
    ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['class_labels']))

    augmented = transform(image=image, bboxes=boxes, class_labels=class_labels)

    save_augmented(augmented['image'], augmented['bboxes'], augmented['class_labels'], filename)
