# Data augmentation

In [2]:
from ultralytics import YOLO
import os
import cv2
from cv2 import Mat
import numpy as np
import sys
HOME = os.getcwd()
print(f"Directory: {HOME}")
print(f"Python version: {sys.version}")

Directory: c:\Users\Thomas\Pictures\YOLOv3
Python version: 3.10.4 (tags/v3.10.4:9d38120, Mar 23 2022, 23:13:41) [MSC v.1929 64 bit (AMD64)]


In [3]:
example_image_path = os.path.join(HOME, "train", "images", "0.jpg")
example_label_path = os.path.join(HOME, "train", "labels", "0.txt")

Function to show image in seperate window, waiting for a key to be pressed

In [4]:
def show_image_cv2(img: Mat | str):
    if isinstance(img, str):
        img = cv2.imread(img)
    cv2.imshow("Image", img)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

Image manipulation functions

In [5]:
def change_brightness(img: Mat | str, value: float):
    if isinstance(img, str):
        img = cv2.imread(img)
    return cv2.convertScaleAbs(img, alpha=value, beta=0)

def change_contrast(img: Mat | str, value: float):
    if isinstance(img, str):
        img = cv2.imread(img)
    return cv2.convertScaleAbs(img, alpha=1.0, beta=value)

def change_saturation(img: Mat | str, value: float):
    if isinstance(img, str):
        img = cv2.imread(img)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
    img[:, :, 1] = img[:, :, 1] * value
    return cv2.cvtColor(img, cv2.COLOR_HSV2BGR)

def blur_image(img: Mat | str, kernel_size: int):
    if isinstance(img, str):
        img = cv2.imread(img)
    return cv2.blur(img, (kernel_size, kernel_size))

def sharpen_image(img: Mat | str):
    if isinstance(img, str):
        img = cv2.imread(img)
    kernel_size = 20
    kernel = np.zeros((kernel_size, kernel_size), np.float32)
    kernel[int((kernel_size - 1) / 2), int((kernel_size - 1) / 2)] = 2.0
    boxFilter = np.ones((kernel_size, kernel_size), np.float32) / float(kernel_size * kernel_size)
    kernel = kernel - boxFilter
    return cv2.filter2D(img, -1, kernel)

def flip_image(img: Mat | str, flip_code: str):
    if isinstance(img, str):
        img = cv2.imread(img)
    if flip_code == 'horizontal':
        return cv2.flip(img, 1)
    elif flip_code == 'vertical':
        return cv2.flip(img, 0)
    elif flip_code == 'both':
        return cv2.flip(img, -1)

Test image manipulation functions

In [6]:
img = cv2.imread(example_image_path)

#increase brightness
img_bright = change_brightness(img, 1.5)
show_image_cv2(img_bright)

#decrease brightness
img_dark = change_brightness(img, 0.5)
show_image_cv2(img_dark)

#blur
img_blur = blur_image(img, 5)
show_image_cv2(img_blur)

#sharpen
img_sharp = sharpen_image(img)
show_image_cv2(img_sharp)

#increase saturation
img_sat = change_saturation(img, 1.5)
show_image_cv2(img_sat)

#decrease saturation
img_desat = change_saturation(img, 0.5)
show_image_cv2(img_desat)

#flip
img_flip = flip_image(img, 'both')
show_image_cv2(img_flip)


Generate 4 random augmentation steps

In [7]:
def get_random_actions() -> list[str]:
    brightness = ['bright', 'dark']
    blurriness = ['blur', 'sharp']
    saturation = ['sat', 'desat']
    flip = ['horizontal', 'vertical', 'both']
    actions = []
    random_brightness = np.random.choice(brightness, 1, replace=False)[0]
    actions.append(random_brightness)
    random_blurriness = np.random.choice(blurriness, 1, replace=False)[0]
    actions.append(random_blurriness)
    random_saturation = np.random.choice(saturation, 1, replace=False)[0]
    actions.append(random_saturation)
    random_flip = np.random.choice(flip, 1, replace=False)[0]
    actions.append(random_flip)
    return actions

Apply augmentation effects to the image

In [8]:
def apply_actions(img: Mat, actions: list[str]):
    for action in actions:
        if action == 'bright':
            img = change_brightness(img, 1.25)
        elif action == 'dark':
            img = change_brightness(img, 0.75)
        elif action == 'blur':
            img = blur_image(img, 4)
        elif action == 'sharp':
            img = sharpen_image(img)
        elif action == 'sat':
            img = change_saturation(img, 1.25)
        elif action == 'desat':
            img = change_saturation(img, 0.75)
        elif action == 'horizontal':
            img = flip_image(img, 'horizontal')
        elif action == 'vertical':
            img = flip_image(img, 'vertical')
        elif action == 'both':
            img = flip_image(img, 'both')
    return img

Visualize some random data augmentations

In [9]:
img = cv2.imread(example_image_path)
for i in range(20):
    actions = get_random_actions()
    print(actions)
    img2 = apply_actions(img, actions)
    show_image_cv2(img2)

['dark', 'sharp', 'desat', 'both']
['bright', 'blur', 'sat', 'both']
['dark', 'blur', 'sat', 'horizontal']
['dark', 'blur', 'sat', 'vertical']
['bright', 'sharp', 'desat', 'vertical']
['bright', 'blur', 'desat', 'horizontal']
['dark', 'blur', 'desat', 'horizontal']
['bright', 'blur', 'sat', 'vertical']
['bright', 'sharp', 'desat', 'horizontal']
['bright', 'blur', 'sat', 'horizontal']
['bright', 'blur', 'sat', 'both']
['dark', 'sharp', 'desat', 'both']
['dark', 'blur', 'desat', 'vertical']
['dark', 'sharp', 'sat', 'horizontal']
['dark', 'sharp', 'sat', 'both']
['dark', 'blur', 'sat', 'horizontal']
['dark', 'blur', 'sat', 'horizontal']
['dark', 'sharp', 'sat', 'both']
['bright', 'sharp', 'sat', 'both']
['bright', 'sharp', 'sat', 'both']


Draw boxes on given image using it's coordinates

In [10]:
def draw_boxes(img: Mat | str, bounding_boxes: list, classes: list):
    for i in range(len(bounding_boxes)):
        box = bounding_boxes[i]
        label = classes[i]
        x1 = int(box[0])
        y1 = int(box[1])
        x2 = int(box[2])
        y2 = int(box[3])
        cv2.rectangle(img, (x1, y1), (x2, y2), (255, 34, 134), 1)
        #fill rectangle with semi transparent color
        overlay = img.copy()
        cv2.rectangle(overlay, (x1, y1), (x2, y2), (0, 0, 0), -1)
        img = cv2.addWeighted(overlay, 0.2, img, 0.8, 0)
        
        cv2.rectangle(img, (x1, y1-20), (x1+20, y1), (255, 34, 134), -1)
        cv2.putText(img, " "+label, (x1, y1-6), cv2.FONT_HERSHEY_SIMPLEX, 0.4, (255, 255, 255), 1)
    return img

In [12]:
images_path = os.path.join(HOME, 'train', 'images')
labels_path = os.path.join(HOME, 'train', 'labels')

Rename all files for simplicity

In [None]:
images_names = os.listdir(images_path)
names = [name.rstrip('.jpg') for name in images_names]

In [None]:
for i in range(len(names)):
    name = names[i]
    img_path = os.path.join(images_path, name + '.jpg')
    lbl_path = os.path.join(labels_path, name + '.txt')
    img_path_new = os.path.join(images_path, str(i) + '.jpg')
    lbl_path_new = os.path.join(labels_path, str(i) + '.txt')
    if os.path.exists(img_path) and os.path.exists(lbl_path):
        os.rename(img_path, img_path_new)
        os.rename(lbl_path, lbl_path_new)
    else:
        print('Error: ' + name)

Display top of the annotated dataset

In [13]:
names = os.listdir(images_path)
for i in range(10):
    img_path = os.path.join(images_path, names[i])
    lbl_path = os.path.join(labels_path, names[i].rstrip('.jpg') + '.txt')
    with open(lbl_path, 'r') as f:
        lines = f.readlines()
    bounding_boxes = []
    classes = []
    for line in lines:
        line = line.strip().split(' ')
        classes.append(line[0])
        xc = float(line[1]); yc = float(line[2]); xw = float(line[3]); yw = float(line[4])
        x1 = int((xc - xw/2) * 640); y1 = int((yc - yw/2) * 480); x2 = int((xc + xw/2) * 640); y2 = int((yc + yw/2) * 480)
        bounding_boxes.append([x1, y1, x2, y2])
    img = cv2.imread(img_path)
    img = draw_boxes(img, bounding_boxes, classes)
    show_image_cv2(img)

Test bounding box transformation when image is flipped

In [None]:
type_of_flip = 'both'

img = cv2.imread(example_image_path)
img = flip_image(img, type_of_flip)

with open(example_label_path, 'r') as f:
        lines = f.readlines()
bounding_boxes = []
classes = []
for line in lines:
    line = line.strip().split(' ')
    classes.append(line[0])
    xc = float(line[1]); yc = float(line[2]); xw = float(line[3]); yw = float(line[4])
    if type_of_flip == 'horizontal':
        xc = 1 - xc
    elif type_of_flip == 'vertical':
        yc = 1 - yc
    elif type_of_flip == 'both':
        xc = 1 - xc; yc = 1 - yc
    x1 = int((xc - xw/2) * 640); y1 = int((yc - yw/2) * 480); x2 = int((xc + xw/2) * 640); y2 = int((yc + yw/2) * 480)
    bounding_boxes.append([x1, y1, x2, y2])
img = draw_boxes(img, bounding_boxes, classes)
show_image_cv2(img)

Augment 2 images for each image in the dataset

In [None]:
images_path = os.path.join(HOME, 'valid', 'images')
labels_path = os.path.join(HOME, 'valid', 'labels')

images_names = os.listdir(images_path)
names = [name.rstrip('.jpg') for name in images_names]

for i in range(len(names)):
    name = names[i]
    img_path = os.path.join(images_path, name + '.jpg')
    lbl_path = os.path.join(labels_path, name + '.txt')
    if os.path.exists(img_path) and os.path.exists(lbl_path):
        img = cv2.imread(img_path)
        with open(lbl_path, 'r') as f:
            lines = f.readlines()
        bounding_boxes = []
        classes = []
        for line in lines:
            line = line.strip().split(' ')
            classes.append(line[0])
            xc = float(line[1]); yc = float(line[2]); xw = float(line[3]); yw = float(line[4])
            x1 = int((xc - xw/2) * 640); y1 = int((yc - yw/2) * 480); x2 = int((xc + xw/2) * 640); y2 = int((yc + yw/2) * 480)
            bounding_boxes.append([x1, y1, x2, y2])
        for j in range(2):
            actions = get_random_actions()
            img2 = apply_actions(img, actions)
            img2_path = os.path.join(images_path, f"{i}_{j}.jpg")
            img2 = cv2.imwrite(img2_path, img2)
            with open(os.path.join(labels_path, f"{i}_{j}.txt"), 'w') as f:
                for k in range(len(classes)):
                    x1 = bounding_boxes[k][0]; y1 = bounding_boxes[k][1]; x2 = bounding_boxes[k][2]; y2 = bounding_boxes[k][3]
                    xc = (x1 + x2) / 2 / 640; yc = (y1 + y2) / 2 / 480; xw = (x2 - x1) / 640; yw = (y2 - y1) / 480
                    if 'horizontal' in actions:
                        xc = 1 - xc
                    elif 'vertical' in actions:
                        yc = 1 - yc
                    elif 'both' in actions:
                        xc = 1 - xc; yc = 1 - yc
                    f.write(f"{classes[k]} {xc} {yc} {xw} {yw}\n")
    else:
        print('Error: ' + name)