<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Learn-to-Augment-Images-and-Multiple-Bounding-Boxes-for-Deep-Learning-in-4-Steps" data-toc-modified-id="Learn-to-Augment-Images-and-Multiple-Bounding-Boxes-for-Deep-Learning-in-4-Steps-1"><span class="toc-item-num">1&nbsp;&nbsp;</span><a href="https://medium.com/@a.karazhay/guide-augment-images-and-multiple-bounding-boxes-for-deep-learning-in-4-steps-with-the-notebook-9b263e414dac" target="_blank">Learn to Augment Images and Multiple Bounding Boxes for Deep Learning in 4 Steps</a></a></span></li><li><span><a href="#Convert-All-XML-to-one-CSV" data-toc-modified-id="Convert-All-XML-to-one-CSV-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Convert All XML to one CSV</a></span></li><li><span><a href="#Augment-images" data-toc-modified-id="Augment-images-3"><span class="toc-item-num">3&nbsp;&nbsp;</span><a href="https://imgaug.readthedocs.io/en/latest/source/examples_basics.html" target="_blank">Augment images</a></a></span></li><li><span><a href="#Merge-resized-and-augmented-images" data-toc-modified-id="Merge-resized-and-augmented-images-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>Merge resized and augmented images</a></span></li></ul></div>

# [Learn to Augment Images and Multiple Bounding Boxes for Deep Learning in 4 Steps](https://medium.com/@a.karazhay/guide-augment-images-and-multiple-bounding-boxes-for-deep-learning-in-4-steps-with-the-notebook-9b263e414dac)

In [1]:
import imgaug as ia
from imgaug.augmentables.bbs import BoundingBox, BoundingBoxesOnImage
from imgaug import augmenters as iaa 

import imageio
import pandas as pd
import numpy as np
import re
import os
import glob

import xml.etree.ElementTree as ET
import shutil

# imgaug uses matplotlib backend for displaying images
ia.seed(1)
%matplotlib inline

# Convert All XML to one CSV

In [2]:
# Function that will extract column data for our CSV file as pandas DataFrame

def xml_to_csv(path):
    xml_list = []
    xml_files = [file for file in os.listdir(path) if file.endswith(".xml")]
    os.chdir(path)
    for xml_file in xml_files:
        tree = ET.parse(xml_file)
        root = tree.getroot()
        name = os.path.splitext(xml_file)[0]
        for member in root.findall('object'):
            value = None
            try:
                value = [
                    int(root.find('size')[0].text),
                    int(root.find('size')[1].text),
                    member[0].text,
                    int(member[4][0].text),
                    int(member[4][1].text),
                    int(member[4][2].text),
                    int(member[4][3].text)
                ]
            except:
                value = [
                    float(root.find('size')[0].text),
                    float(root.find('size')[1].text),
                    member[0].text,
                    float(member[4][0].text),
                    float(member[4][1].text),
                    float(member[4][2].text),
                    float(member[4][3].text)
                ]
            finally:
                value.insert(0, f"{name}.jpg")
                xml_list.append(value)
    column_name = ['filename', 'width', 'height', 'class', 'xmin', 'ymin', 'xmax', 'ymax']
    xml_df = pd.DataFrame(xml_list, columns=column_name)
    os.chdir("..")
    return xml_df

In [4]:
IMAGE_SIZES = [300, 320, 512, 640]

ROOT_DIR = r'C:\Users\batman\Desktop\Main\Detection-and-Classification\Detection'
AFTER_DIR = r'C:\Users\batman\Desktop\Main\Detection-and-Classification\Detection\scripts'
SAVE_PATH = r'C:\Users\batman\Desktop\Main\Detection-and-Classification\Detection\annotations'

if not os.path.exists(SAVE_PATH):
    os.makedirs(SAVE_PATH)

In [5]:
for IMG_SIZE in IMAGE_SIZES:
    if not os.getcwd() == ROOT_DIR:
        os.chdir(ROOT_DIR)
    # apply the function to convert all XML files in images/ folder into labels.csv
    labels_df = xml_to_csv(fr'./all_images/final_images_{IMG_SIZE}/')
    labels_df.to_csv((f'{SAVE_PATH}\labels_{IMG_SIZE}.csv'), index=None)

if not os.getcwd() == AFTER_DIR:
    os.chdir(AFTER_DIR)
    
# labels_df

# [Augment images](https://imgaug.readthedocs.io/en/latest/source/examples_basics.html)

In [None]:
# aug = iaa.SomeOf(2, [    
#     iaa.Affine(scale=(0.1, 0.2)),
#     iaa.Affine(rotate=(-20, 20)),
#     iaa.Affine(translate_percent={"x":(-0.1, 0.1),"y":(-0.25, 0.25)}),
#     iaa.GaussianBlur(sigma=(1.0, 2.0)),
#     iaa.AdditiveGaussianNoise(scale=(0.09*255, 0.005*255))
# ])

In [None]:
sometimes = lambda aug: iaa.Sometimes(0.5, aug)

aug = iaa.SomeOf((0, 5),[
                sometimes(
                    iaa.Superpixels(
                        p_replace=(0, 1.0),
                        n_segments=(50, 200)
                    )
                ),
    
                iaa.Affine(
                    scale={"x": (0.8, 1.2), "y": (0.8, 1.2)},
                    translate_percent={"x": (-0.15, 0.15), "y": (-0.17, 0.17)},
                    rotate=(-25, 25),
                    shear=(-8, 8)
                ),
            
                sometimes(
                    iaa.AdditiveGaussianNoise(scale=(0.09*255, 0.1*255))
                ),
                
                iaa.OneOf([
                    # Add a value of -10 to 10 to each pixel.
                    iaa.Add((0, 5), per_channel=1),
                    # Change brightness of images (50-150% of original value).
                    iaa.Multiply((0.3, 0.6), per_channel=1,),
                ])
            ],
            random_order=True
        )

In [None]:
# function to convert BoundingBoxesOnImage object into DataFrame
def bbs_obj_to_df(bbs_object):
#     convert BoundingBoxesOnImage object into array
    bbs_array = bbs_object.to_xyxy_array()
#     convert array into a DataFrame ['xmin', 'ymin', 'xmax', 'ymax'] columns
    df_bbs = pd.DataFrame(bbs_array, columns=['xmin', 'ymin', 'xmax', 'ymax'])
    return df_bbs

In [None]:
def image_augmentation(df, images_path, aug_images_path, image_prefix, augmentor):
    # create data frame which we're going to populate with augmented image info
    aug_bbs_xy = pd.DataFrame(columns=
                              ['filename','width','height','class', 'xmin', 'ymin', 'xmax', 'ymax']
                             )
    grouped = df.groupby('filename')
    
    for filename in df['filename'].unique():
    #   get separate data frame grouped by file name
        group_df = grouped.get_group(filename)
        group_df = group_df.reset_index()
        group_df = group_df.drop(['index'], axis=1)   
    #   read the image
        image = imageio.imread(images_path+filename)
    #   get bounding boxes coordinates and write into array        
        bb_array = group_df.drop(['filename', 'width', 'height', 'class'], axis=1).values
    #   pass the array of bounding boxes coordinates to the imgaug library
        bbs = BoundingBoxesOnImage.from_xyxy_array(bb_array, shape=image.shape)
    #   apply augmentation on image and on the bounding boxes
        image_aug, bbs_aug = augmentor(image=image, bounding_boxes=bbs)
    #   disregard bounding boxes which have fallen out of image pane    
        bbs_aug = bbs_aug.remove_out_of_image()
    #   clip bounding boxes which are partially outside of image pane
        bbs_aug = bbs_aug.clip_out_of_image()
        
    #   don't perform any actions with the image if there are no bounding boxes left in it    
        if re.findall('Image...', str(bbs_aug)) == ['Image([]']:
            pass
        
    #   otherwise continue
        else:
        #   write augmented image to a file
            imageio.imwrite(aug_images_path+image_prefix+filename, image_aug)  
        #   create a data frame with augmented values of image width and height
            info_df = group_df.drop(['xmin', 'ymin', 'xmax', 'ymax'], axis=1)    
            for index, _ in info_df.iterrows():
                info_df.at[index, 'width'] = image_aug.shape[1]
                info_df.at[index, 'height'] = image_aug.shape[0]
        #   rename filenames by adding the predifined prefix
            info_df['filename'] = info_df['filename'].apply(lambda x: image_prefix+x)
        #   create a data frame with augmented bounding boxes coordinates using the function we created earlier
            bbs_df = bbs_obj_to_df(bbs_aug)
        #   concat all new augmented info into new data frame
            aug_df = pd.concat([info_df, bbs_df], axis=1)
        #   append rows to aug_bbs_xy data frame
            aug_bbs_xy = pd.concat([aug_bbs_xy, aug_df])            
    
    # return dataframe with updated images and bounding boxes annotations 
    aug_bbs_xy = aug_bbs_xy.reset_index()
    aug_bbs_xy = aug_bbs_xy.drop(['index'], axis=1)
    return aug_bbs_xy

In [None]:
destination_folder = input("Enter destination folder name: ")

if os.path.isdir(destination_folder):
    shutil.rmtree(destination_folder)

os.makedirs(destination_folder)

augmented_images_df = image_augmentation(labels_df, 'resized_images/', f'{destination_folder}/', 'aug_', aug)

#  Merge resized and augmented images

In [None]:
# merging dataframe

all_labels_df = pd.concat([labels_df, augmented_images_df])
all_labels_df.to_csv('all_labels.csv', index=False)

In [None]:
# merging folder

for file in os.listdir(destination_folder):
    shutil.copy(f'{destination_folder}/{file}', f'resized_images/{file}')
    
shutil.rmtree(destination_folder)

In [None]:
all_labels_df