In [2]:
import imgaug as ia
ia.seed(1)
# imgaug uses matplotlib backend for displaying images
%matplotlib inline
from imgaug.augmentables.bbs import BoundingBox, BoundingBoxesOnImage
from imgaug import augmenters as iaa 
# imageio library will be used for image input/output
import imageio
import pandas as pd
import numpy as np
import re
import os
import glob
# this library is needed to read XML files for converting it into CSV
import xml.etree.ElementTree as ET
import shutil

In [3]:
# Function that will extract column data for our CSV file as pandas DataFrame
def xml_to_csv(path):
    xml_list = []
    for xml_file in glob.glob(path + '/*.xml'):
        tree = ET.parse(xml_file)
        root = tree.getroot()
        for member in root.findall('object'):
            try:
                value = (root.find('filename').text,
                         int(root.find('size')[0].text),
                         int(root.find('size')[1].text),
                         int(member[4][0].text),
                         int(member[4][1].text),
                         int(member[4][2].text),
                         int(member[4][3].text),
                         member[0].text
                         )
                xml_list.append(value)
            except:
                pass
    column_name = ['filename', 'width', 'height','xmin', 'ymin', 'xmax', 'ymax','class']
    xml_df = pd.DataFrame(xml_list, columns=column_name)
    return xml_df
   
# apply the function to convert all XML files in images/ folder into labels.csv
labels_df = xml_to_csv('../../images/AGRI_LEG_LAB')
#labels_df = xml_to_csv('../../images/TRUE_LAB')
labels_df.to_csv(('labels.csv'), index=None)

In [4]:
#Resize
height_resize = iaa.Sequential([ 
    iaa.Resize({"height": 1000, "width": 'keep-aspect-ratio'})
])
width_resize = iaa.Sequential([ 
    iaa.Resize({"height": 'keep-aspect-ratio', "width": 1000})
])

In [5]:
# function to convert BoundingBoxesOnImage object into DataFrame
def bbs_obj_to_df(bbs_object):
#     convert BoundingBoxesOnImage object into array
    bbs_array = bbs_object.to_xyxy_array()
#     convert array into a DataFrame ['xmin', 'ymin', 'xmax', 'ymax'] columns
    df_bbs = pd.DataFrame(bbs_array, columns=['xmin', 'ymin', 'xmax', 'ymax'])
    return df_bbs

In [6]:
def resize_imgaug(df, images_path, aug_images_path, image_prefix):
    # create data frame which we're going to populate with augmented image info
    aug_bbs_xy = pd.DataFrame(columns=
                              ['filename', 'width', 'height', 'xmin', 'ymin', 'xmax', 'ymax','class']
                             )
    grouped = df.groupby('filename')    
    
    for filename in df['filename'].unique():
    #   Get separate data frame grouped by file name
        group_df = grouped.get_group(filename)
        group_df = group_df.reset_index()
        group_df = group_df.drop(['index'], axis=1)
        
    #   The only difference between if and elif statements below is the use of height_resize and width_resize augmentors
    #   deffined previously.

    #   If image height is greater than or equal to image width 
    #   AND greater than 600px perform resizing augmentation shrinking image height to 600px.
        if group_df['height'].unique()[0] >= group_df['width'].unique()[0] and group_df['height'].unique()[0] > 1000:
        #   read the image
            image = imageio.imread(images_path+filename)
        #   get bounding boxes coordinates and write into array        
            bb_array = group_df.drop(['filename', 'width', 'height', 'class'], axis=1).values
        #   pass the array of bounding boxes coordinates to the imgaug library
            bbs = BoundingBoxesOnImage.from_xyxy_array(bb_array, shape=image.shape)
        #   apply augmentation on image and on the bounding boxes
            image_aug, bbs_aug = height_resize(image=image, bounding_boxes=bbs)
        #   write augmented image to a file
            imageio.imwrite(aug_images_path+image_prefix+filename, image_aug)  
        #   create a data frame with augmented values of image width and height
            info_df = group_df.drop(['xmin', 'ymin', 'xmax', 'ymax'], axis=1)        
            for index, _ in info_df.iterrows():
                info_df.at[index, 'width'] = image_aug.shape[1]
                info_df.at[index, 'height'] = image_aug.shape[0]
        #   rename filenames by adding the predifined prefix
            info_df['filename'] = info_df['filename'].apply(lambda x: image_prefix+x)
        #   create a data frame with augmented bounding boxes coordinates using the function we created earlier
            bbs_df = bbs_obj_to_df(bbs_aug)
        #   concat all new augmented info into new data frame
            aug_df = pd.concat([info_df, bbs_df], axis=1)
        #   append rows to aug_bbs_xy data frame
            aug_bbs_xy = pd.concat([aug_bbs_xy, aug_df])
            
    #   if image width is greater than image height 
    #   AND greater than 600px perform resizing augmentation shrinking image width to 600px
        elif group_df['width'].unique()[0] > group_df['height'].unique()[0] and group_df['width'].unique()[0] > 1000:
        #   read the image
            image = imageio.imread(images_path+filename)
        #   get bounding boxes coordinates and write into array        
            bb_array = group_df.drop(['filename', 'width', 'height', 'class'], axis=1).values
        #   pass the array of bounding boxes coordinates to the imgaug library
            bbs = BoundingBoxesOnImage.from_xyxy_array(bb_array, shape=image.shape)
        #   apply augmentation on image and on the bounding boxes
            image_aug, bbs_aug = width_resize(image=image, bounding_boxes=bbs)
        #   write augmented image to a file
            imageio.imwrite(aug_images_path+image_prefix+filename, image_aug)  
        #   create a data frame with augmented values of image width and height
            info_df = group_df.drop(['xmin', 'ymin', 'xmax', 'ymax'], axis=1)        
            for index, _ in info_df.iterrows():
                info_df.at[index, 'width'] = image_aug.shape[1]
                info_df.at[index, 'height'] = image_aug.shape[0]
        #   rename filenames by adding the predifined prefix
            info_df['filename'] = info_df['filename'].apply(lambda x: image_prefix+x)
        #   create a data frame with augmented bounding boxes coordinates using the function we created earlier
            bbs_df = bbs_obj_to_df(bbs_aug)
        #   concat all new augmented info into new data frame
            aug_df = pd.concat([info_df, bbs_df], axis=1)
        #   append rows to aug_bbs_xy data frame
            aug_bbs_xy = pd.concat([aug_bbs_xy, aug_df])

    #     append image info without any changes if it's height and width are both less than 600px 
        else:
            aug_bbs_xy = pd.concat([aug_bbs_xy, group_df])
    # return dataframe with updated images and bounding boxes annotations 
    aug_bbs_xy = aug_bbs_xy.reset_index()
    aug_bbs_xy = aug_bbs_xy.drop(['index'], axis=1)
    return aug_bbs_xy

In [7]:
resized_images_df = resize_imgaug(labels_df, 'images/', 'images/', '')

In [8]:
resized_images_df

Unnamed: 0,filename,width,height,xmin,ymin,xmax,ymax,class
0,-56_16__-2_54_2020-09-06.jpg,1000,1000,1,1,219,323,legal
1,-56_16__-2_54_2020-09-06.jpg,1000,1000,1,209,869,1000,legal
2,-56_16__-2_58_2020-09-06.jpg,1000,1000,78,1,381,142,legal
3,-56_16__-2_58_2020-09-06.jpg,1000,1000,1,185,54,348,legal
4,-56_16__-2_58_2020-09-06.jpg,1000,1000,1,49,41,151,legal
...,...,...,...,...,...,...,...,...
414,-63_6__-9_96_2018-07-25.jpg,1000,1000,870,893,906,905,legal
415,-63_6__-9_96_2018-07-25.jpg,1000,1000,303,642,317,654,legal
416,-63_6__-9_96_2018-07-25.jpg,1000,1000,221,526,232,540,legal
417,-63_6__-9_96_2018-07-25.jpg,1000,1000,131,521,138,530,legal


In [None]:
#augmented

In [9]:
aug = iaa.SomeOf(2, [    
    iaa.Affine(scale=(0.5, 1.5)),
    iaa.Affine(rotate=(-60, 60)),
    #iaa.Affine(translate_percent={"x":(-0.3, 0.3),"y":(-0.3, 0.3)}),
    iaa.Fliplr(1),
    iaa.Multiply((0.5, 1.5)),
    #iaa.GaussianBlur(sigma=(1.0, 3.0)),
    #iaa.AdditiveGaussianNoise(scale=(0.03*255, 0.05*255))
])

In [10]:
  def image_aug(df, images_path, aug_images_path, image_prefix, augmentor):
    # create data frame which we're going to populate with augmented image info
    aug_bbs_xy = pd.DataFrame(columns=
                              ['filename', 'width', 'height','xmin' , 'ymin', 'xmax', 'ymax','class']
                             )
    grouped = df.groupby('filename')
    
    for filename in df['filename'].unique():
    #   get separate data frame grouped by file name
        group_df = grouped.get_group(filename)
        group_df = group_df.reset_index()
        group_df = group_df.drop(['index'], axis=1)   
    #   read the image
        image = imageio.imread(images_path+filename)
    #   get bounding boxes coordinates and write into array        
        bb_array = group_df.drop(['filename', 'width', 'height', 'class'], axis=1).values
    #   pass the array of bounding boxes coordinates to the imgaug library
        bbs = BoundingBoxesOnImage.from_xyxy_array(bb_array, shape=image.shape)
        
    #   apply augmentation on image and on the bounding boxes
        image_aug, bbs_aug = augmentor(image=image, bounding_boxes=bbs)
    #   disregard bounding boxes which have fallen out of image pane    
        bbs_aug = bbs_aug.remove_out_of_image()
    #   clip bounding boxes which are partially outside of image pane
        bbs_aug = bbs_aug.clip_out_of_image()

            
    #   don't perform any actions with the image if there are no bounding boxes left in it    
        if re.findall('Image...', str(bbs_aug)) == ['Image([]']:
            pass
        
    #   otherwise continue
        else:
        #   write augmented image to a file
            imageio.imwrite(aug_images_path+image_prefix+filename, image_aug)  
        #   create a data frame with augmented values of image width and height
            info_df = group_df.drop(['xmin', 'ymin', 'xmax', 'ymax'], axis=1)    
            for index, _ in info_df.iterrows():
                info_df.at[index, 'width'] = image_aug.shape[1]
                info_df.at[index, 'height'] = image_aug.shape[0]
        #   rename filenames by adding the predifined prefix
            info_df['filename'] = info_df['filename'].apply(lambda x: image_prefix+x)
        #   create a data frame with augmented bounding boxes coordinates using the function we created earlier
            bbs_df = bbs_obj_to_df(bbs_aug)
        #   concat all new augmented info into new data frame
            aug_df = pd.concat([info_df, bbs_df], axis=1)
        #   append rows to aug_bbs_xy data frame
            aug_bbs_xy = pd.concat([aug_bbs_xy, aug_df])            
    
    # return dataframe with updated images and bounding boxes annotations 
    aug_bbs_xy = aug_bbs_xy.reset_index()
    aug_bbs_xy = aug_bbs_xy.drop(['index'], axis=1)
    return aug_bbs_xy

In [11]:
augmented_images_df = image_aug(resized_images_df, 'images/', 'aug_images/', 'aug1_', aug)

In [13]:
print(augmented_images_df)

                              filename width height        xmin        ymin  \
0      aug1_-52_19-0_58_2019-09-24.jpg  1000   1000  799.715210    1.623033   
1      aug1_-52_19-0_62_2019-09-24.jpg  1000   1000  119.827866  779.044800   
2      aug1_-52_19-0_62_2019-09-24.jpg  1000   1000  349.523895  678.533875   
3      aug1_-52_19-0_62_2019-09-24.jpg  1000   1000  239.057907  776.914795   
4      aug1_-52_19-0_62_2019-09-24.jpg  1000   1000  617.377747  628.609558   
...                                ...   ...    ...         ...         ...   
2169  aug1_-73_82--3_91_2019-09-03.jpg  1000   1000  545.091919  658.686951   
2170  aug1_-73_82--3_91_2019-09-03.jpg  1000   1000  782.376587  607.955933   
2171  aug1_-73_82--3_91_2019-09-03.jpg  1000   1000  898.328369  538.729248   
2172  aug1_-73_82--3_91_2019-09-03.jpg  1000   1000  268.310516  618.545959   
2173  aug1_-73_82--3_91_2019-09-03.jpg  1000   1000  594.923584  999.624878   

             xmax         ymax class  
0      845.6

                              filename  width  height        xmin        ymin  \
0      aug1_-52_19-0_58_2019-09-24.jpg   1000    1000  922.301880  379.163879   
1      aug1_-52_19-0_62_2019-09-24.jpg   1000    1000    0.000000  571.196716   
2      aug1_-52_19-0_62_2019-09-24.jpg   1000    1000  222.103165  604.498413   
3      aug1_-52_19-0_62_2019-09-24.jpg   1000    1000   51.001396  632.058411   
4      aug1_-52_19-0_62_2019-09-24.jpg   1000    1000  500.000000  742.298462   
...                                ...    ...     ...         ...         ...   
2161  aug1_-73_82--3_91_2019-09-03.jpg   1000    1000  734.355164   81.221779   
2162  aug1_-73_82--3_91_2019-09-03.jpg   1000    1000  701.518127    0.000000   
2163  aug1_-73_82--3_91_2019-09-03.jpg   1000    1000  586.077454  413.756470   
2164  aug1_-73_82--3_91_2019-09-03.jpg   1000    1000    0.000000    0.000000   
2165  aug1_-73_82--3_91_2019-09-03.jpg   1000    1000    0.000000    0.000000   

             xmax        ym

ValueError: Cannot convert non-finite values (NA or inf) to integer

In [12]:
#concat
all_labels_df = pd.concat([resized_images_df, augmented_images_df])
all_labels_df.to_csv('all_labels.csv', index=False)

In [12]:
#put all images together
for file in os.listdir('aug_images'):
    shutil.copy('aug_images/'+file, 'imagesAndAug/'+file)
for file in os.listdir('images'):
    shutil.copy('images/'+file, 'imagesAndAug/'+file)