In [1]:
import cv2
import os
import random
import numpy as np
import albumentations as A
import matplotlib.pyplot as plt
from matplotlib import patches


  from .autonotebook import tqdm as notebook_tqdm


In [None]:
augmentation_pipeline = A.Compose([
    A.HorizontalFlip(p=0.5),
    A.Rotate(limit=30, p=0.7),
    A.RandomBrightnessContrast(p=0.5),
    A.GaussianBlur(p=0.2),
    A.HueSaturationValue(p=0.3),
    A.CLAHE(clip_limit=2.0, tile_grid_size=(8, 8), p=0.2),
], bbox_params=A.BboxParams(format='yolo', label_fields=['category_ids']))

In [None]:
def visualize(image, boxes):
    fig, ax = plt.subplots(1, figsize=(10, 10))
    ax.imshow(image)
    for box in boxes:
        xmin, ymin, xmax, ymax = map(int, box[1:])
        rect = patches.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin,
                                 linewidth=2, edgecolor='r', facecolor='none')
        ax.add_patch(rect)
    plt.show()

In [None]:
def normalize_bboxes(image, boxes):
    height, width = image.shape[:2]
    normalized_boxes = []
    for box in boxes:
        cls, xmin, ymin, xmax, ymax = box
        xmin /= width
        ymin /= height
        xmax /= width
        ymax /= height
        normalized_boxes.append([cls, xmin, ymin, xmax, ymax])
    return normalized_boxes

In [None]:
def clip_normalized_bboxes(boxes):
    clipped_boxes = []
    for box in boxes:
        cls, xmin, ymin, xmax, ymax = box
        xmin = max(0.0, min(xmin, 1.0))
        ymin = max(0.0, min(ymin, 1.0))
        xmax = max(0.0, min(xmax, 1.0))
        ymax = max(0.0, min(ymax, 1.0))
        clipped_boxes.append([cls, xmin, ymin, xmax, ymax])
    return clipped_boxes

In [None]:
def filter_invalid_bboxes(boxes):
    filtered = []
    for box in boxes:
        cls, xmin, ymin, xmax, ymax = box
        if xmax > xmin and ymax > ymin:
            filtered.append([cls, xmin, ymin, xmax, ymax])
    return filtered

In [None]:
def convert_to_yolo_format(boxes):
    yolo_boxes = []
    for box in boxes:
        _, xmin, ymin, xmax, ymax = box
        x_center = (xmin + xmax) / 2
        y_center = (ymin + ymax) / 2
        width = xmax - xmin
        height = ymax - ymin
        yolo_boxes.append([x_center, y_center, width, height])
    return yolo_boxes

In [None]:
def augment_image(image, boxes):
    normalized_bboxes = normalize_bboxes(image, boxes)
    normalized_bboxes = clip_normalized_bboxes(normalized_bboxes)
    normalized_bboxes = filter_invalid_bboxes(normalized_bboxes)
    yolo_bboxes = convert_to_yolo_format(normalized_bboxes)
    transformed = augmentation_pipeline(
        image=image,
        bboxes=yolo_bboxes,
        category_ids=[box[0] for box in normalized_bboxes]
    )
    transformed_image = transformed['image']
    transformed_bboxes = transformed['bboxes']
    return transformed_image, transformed_bboxes

In [None]:
image_dir = 'PATH_TO_PROCESSED_IMAGE_DIR'
label_dir = 'PATH_TO_LABEL_DIR'

image_file = random.choice(os.listdir(image_dir))
image_path = os.path.join(image_dir, image_file)
label_path = os.path.join(label_dir, image_file.replace('.png', '.txt'))

image = cv2.imread(image_path)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

boxes = []
with open(label_path, 'r') as label_file:
    for line in label_file.readlines():
        parts = line.strip().split()
        cls = int(parts[0])
        xmin, ymin, xmax, ymax = map(float, parts[1:])
        boxes.append([cls, xmin, ymin, xmax, ymax])

augmented_image, augmented_bboxes = augment_image(image, boxes)

In [None]:
def yolo_to_xyxy(bboxes, class_ids, img_width, img_height):
    result = []
    for bbox, cls in zip(bboxes, class_ids):
        x_c, y_c, w, h = bbox
        xmin = int((x_c - w / 2) * img_width)
        ymin = int((y_c - h / 2) * img_height)
        xmax = int((x_c + w / 2) * img_width)
        ymax = int((y_c + h / 2) * img_height)
        result.append([cls, xmin, ymin, xmax, ymax])
    return result

In [None]:
image_file = "PATH_TO_IMAGE_FILE"
image_path = os.path.join(image_dir, image_file)
label_path = os.path.join(label_dir, image_file.replace('.png', '.txt'))
image = cv2.imread(image_path)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
boxes = []
with open(label_path, 'r') as label_file:
    for line in label_file.readlines():
        parts = line.strip().split()
        cls = int(parts[0])
        xmin, ymin, xmax, ymax = map(float, parts[1:])
        boxes.append([cls, xmin, ymin, xmax, ymax])
augmented_image, augmented_bboxes = augment_image(image, boxes)
print("Augmented vis_bboxes:", augmented_bboxes)
h0, w0 = image.shape[:2]
orig_vis_bboxes = [[*box] for box in boxes]
visualize(image, orig_vis_bboxes)
aug_class_ids = [box[0] for box in boxes]
aug_vis_bboxes = yolo_to_xyxy(augmented_bboxes, aug_class_ids, w0, h0)
visualize(augmented_image, aug_vis_bboxes)

In [None]:
def save_augmented_image_and_labels(augmented_image, augmented_bboxes, class_ids, image_path, label_path, images_output_dir, labels_output_dir):
    augmented_image_path = os.path.join(images_output_dir, os.path.basename(image_path))
    augmented_image_bgr = cv2.cvtColor(augmented_image, cv2.COLOR_RGB2BGR)
    cv2.imwrite(augmented_image_path, augmented_image_bgr)
    augmented_label_path = os.path.join(labels_output_dir, os.path.basename(label_path))
    with open(augmented_label_path, 'w') as f:
        for bbox, cls in zip(augmented_bboxes, class_ids):
            x_center, y_center, width, height = bbox
            f.write(f"{cls} {x_center} {y_center} {width} {height}\n")
output_dir = 'PATH_TO_AUGMENTED_DATA'
images_output_dir = os.path.join(output_dir, "images")
labels_output_dir = os.path.join(output_dir, "labels")
os.makedirs(images_output_dir, exist_ok=True)
os.makedirs(labels_output_dir, exist_ok=True)

In [None]:
for image_file in os.listdir(image_dir):
    image_path = os.path.join(image_dir, image_file)
    label_path = os.path.join(label_dir, image_file.replace('.png', '.txt'))
    image = cv2.imread(image_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    boxes = []
    with open(label_path, 'r') as label_file:
        for line in label_file.readlines():
            parts = line.strip().split()
            cls = int(parts[0])
            xmin, ymin, xmax, ymax = map(float, parts[1:])
            boxes.append([cls, xmin, ymin, xmax, ymax])
    transformed = augmentation_pipeline(
        image=image,
        bboxes=convert_to_yolo_format(normalize_bboxes(image, boxes)),
        category_ids=[box[0] for box in boxes]
    )
    augmented_image = transformed['image']
    augmented_bboxes = transformed['bboxes']
    augmented_class_ids = transformed['category_ids']
    save_augmented_image_and_labels(
        augmented_image, augmented_bboxes, augmented_class_ids,
        image_path, label_path, images_output_dir, labels_output_dir
    )

In [None]:
random_image_file = random.choice(os.listdir(os.path.join(output_dir, 'images')))
random_image_path = os.path.join(output_dir, 'images', random_image_file)
random_label_path = os.path.join(output_dir, 'labels', random_image_file.replace('.png', '.txt'))
aug_image = cv2.imread(random_image_path)
aug_image = cv2.cvtColor(aug_image, cv2.COLOR_BGR2RGB)
aug_boxes = []
with open(random_label_path, 'r') as label_file:
    for line in label_file.readlines():
        parts = line.strip().split()
        cls = int(float(parts[0]))
        x_center, y_center, width, height = map(float, parts[1:])
        img_h, img_w = aug_image.shape[:2]
        xmin = int((x_center - width / 2) * img_w)
        ymin = int((y_center - height / 2) * img_h)
        xmax = int((x_center + width / 2) * img_w)
        ymax = int((y_center + height / 2) * img_h)
        aug_boxes.append([cls, xmin, ymin, xmax, ymax])
visualize(aug_image, aug_boxes)