In [1]:
import imgaug as ia

# imgaug uses matplotlib backend for displaying images
%matplotlib inline
from imgaug.augmentables.bbs import BoundingBox, BoundingBoxesOnImage
from imgaug import augmenters as iaa 
# imageio library will be used for image input/output
import imageio
import pandas as pd
import numpy as np
import re
import os
import glob
# this library is needed to read XML files for converting it into CSV
import xml.etree.ElementTree as ET
import shutil
import cv2
import random

In [9]:
# Function that will extract column data for our CSV file as pandas DataFrame
def xml_to_csv(path):
    xml_list = []
    for xml_file in glob.glob(path + '/*.xml'):
        tree = ET.parse(xml_file)
        root = tree.getroot()
        for member in root.findall('object'):
            try:
                value = (root.find('filename').text,
                         int(root.find('size')[0].text),
                         int(root.find('size')[1].text),
                         int(member[4][0].text),
                         int(member[4][1].text),
                         int(member[4][2].text),
                         int(member[4][3].text),
                         member[0].text
                         )
                xml_list.append(value)
            except:
                pass
    column_name = ['filename', 'width', 'height','xmin', 'ymin', 'xmax', 'ymax','class']
    xml_df = pd.DataFrame(xml_list, columns=column_name)
    return xml_df
   
# apply the function to convert all XML files in images/ folder into labels.csv
labels_df = xml_to_csv('../../images/allImagesandLabels/AGRI_ILE_LAB/')
#labels_df = xml_to_csv('../../images/TRUE_LAB')
labels_df.to_csv(('labels.csv'), index=None)

In [10]:
#Resize
height_resize = iaa.Sequential([ 
    iaa.Resize({"height": 1000, "width": 'keep-aspect-ratio'})
])
width_resize = iaa.Sequential([ 
    iaa.Resize({"height": 'keep-aspect-ratio', "width": 1000})
])

In [11]:
# function to convert BoundingBoxesOnImage object into DataFrame
def bbs_obj_to_df(bbs_object):
#     convert BoundingBoxesOnImage object into array
    bbs_array = bbs_object.to_xyxy_array()
#     convert array into a DataFrame ['xmin', 'ymin', 'xmax', 'ymax'] columns
    df_bbs = pd.DataFrame(bbs_array, columns=['xmin', 'ymin', 'xmax', 'ymax'])
    return df_bbs

In [12]:
def resize_imgaug(df, images_path, aug_images_path, image_prefix):
    # create data frame which we're going to populate with augmented image info
    aug_bbs_xy = pd.DataFrame(columns=
                              ['filename', 'width', 'height', 'xmin', 'ymin', 'xmax', 'ymax','class']
                             )
    grouped = df.groupby('filename')    
    
    for filename in df['filename'].unique():
    #   Get separate data frame grouped by file name
        group_df = grouped.get_group(filename)
        group_df = group_df.reset_index()
        group_df = group_df.drop(['index'], axis=1)
        
    #   The only difference between if and elif statements below is the use of height_resize and width_resize augmentors
    #   deffined previously.

    #   If image height is greater than or equal to image width 
    #   AND greater than 600px perform resizing augmentation shrinking image height to 600px.
        if group_df['height'].unique()[0] >= group_df['width'].unique()[0] and group_df['height'].unique()[0] > 1000:
        #   read the image
            image = imageio.imread(images_path+filename)
        #   get bounding boxes coordinates and write into array        
            bb_array = group_df.drop(['filename', 'width', 'height', 'class'], axis=1).values
        #   pass the array of bounding boxes coordinates to the imgaug library
            bbs = BoundingBoxesOnImage.from_xyxy_array(bb_array, shape=image.shape)
        #   apply augmentation on image and on the bounding boxes
            image_aug, bbs_aug = height_resize(image=image, bounding_boxes=bbs)
        #   write augmented image to a file
            imageio.imwrite(aug_images_path+image_prefix+filename, image_aug)  
        #   create a data frame with augmented values of image width and height
            info_df = group_df.drop(['xmin', 'ymin', 'xmax', 'ymax'], axis=1)        
            for index, _ in info_df.iterrows():
                info_df.at[index, 'width'] = image_aug.shape[1]
                info_df.at[index, 'height'] = image_aug.shape[0]
        #   rename filenames by adding the predifined prefix
            info_df['filename'] = info_df['filename'].apply(lambda x: image_prefix+x)
        #   create a data frame with augmented bounding boxes coordinates using the function we created earlier
            bbs_df = bbs_obj_to_df(bbs_aug)
        #   concat all new augmented info into new data frame
            aug_df = pd.concat([info_df, bbs_df], axis=1)
        #   append rows to aug_bbs_xy data frame
            aug_bbs_xy = pd.concat([aug_bbs_xy, aug_df])
            
    #   if image width is greater than image height 
    #   AND greater than 600px perform resizing augmentation shrinking image width to 600px
        elif group_df['width'].unique()[0] > group_df['height'].unique()[0] and group_df['width'].unique()[0] > 1000:
        #   read the image
            image = imageio.imread(images_path+filename)
        #   get bounding boxes coordinates and write into array        
            bb_array = group_df.drop(['filename', 'width', 'height', 'class'], axis=1).values
        #   pass the array of bounding boxes coordinates to the imgaug library
            bbs = BoundingBoxesOnImage.from_xyxy_array(bb_array, shape=image.shape)
        #   apply augmentation on image and on the bounding boxes
            image_aug, bbs_aug = width_resize(image=image, bounding_boxes=bbs)
        #   write augmented image to a file
            imageio.imwrite(aug_images_path+image_prefix+filename, image_aug)  
        #   create a data frame with augmented values of image width and height
            info_df = group_df.drop(['xmin', 'ymin', 'xmax', 'ymax'], axis=1)        
            for index, _ in info_df.iterrows():
                info_df.at[index, 'width'] = image_aug.shape[1]
                info_df.at[index, 'height'] = image_aug.shape[0]
        #   rename filenames by adding the predifined prefix
            info_df['filename'] = info_df['filename'].apply(lambda x: image_prefix+x)
        #   create a data frame with augmented bounding boxes coordinates using the function we created earlier
            bbs_df = bbs_obj_to_df(bbs_aug)
        #   concat all new augmented info into new data frame
            aug_df = pd.concat([info_df, bbs_df], axis=1)
        #   append rows to aug_bbs_xy data frame
            aug_bbs_xy = pd.concat([aug_bbs_xy, aug_df])

    #     append image info without any changes if it's height and width are both less than 600px 
        else:
            aug_bbs_xy = pd.concat([aug_bbs_xy, group_df])
    # return dataframe with updated images and bounding boxes annotations 
    aug_bbs_xy = aug_bbs_xy.reset_index()
    aug_bbs_xy = aug_bbs_xy.drop(['index'], axis=1)
    return aug_bbs_xy

In [13]:
resized_images_df = resize_imgaug(labels_df, 'images/', 'images/', '')

In [14]:
resized_images_df

Unnamed: 0,filename,width,height,xmin,ymin,xmax,ymax,class
0,-52_19__0_58_2019-09-24.jpg,1000,1000,349,294,383,330,illegal
1,-52_19__0_58_2019-09-24.jpg,1000,1000,225,83,303,133,illegal
2,-52_19__0_58_2019-09-24.jpg,1000,1000,763,83,784,135,illegal
3,-52_19__0_62_2019-09-24.jpg,1000,1000,242,573,304,631,illegal
4,-52_19__0_62_2019-09-24.jpg,1000,1000,1,565,73,594,illegal
...,...,...,...,...,...,...,...,...
1126,-73_78__-3_91_2019-09-03.jpg,1000,1000,416,541,608,866,illegal
1127,-73_82__-3_91_2019-09-03.jpg,1000,1000,66,383,403,846,illegal
1128,-73_82__-3_91_2019-09-03.jpg,1000,1000,444,228,1000,870,illegal
1129,-73_82__-3_91_2019-09-03.jpg,1000,1000,331,640,494,791,illegal


In [None]:
#augmented

In [36]:
aug = iaa.SomeOf(4, [    
    iaa.Affine(scale=(0.8, 1.2)),
    #iaa.Affine(translate_percent={"x":(-0.3, 0.3),"y":(-0.3, 0.3)}),
    iaa.Fliplr(1),
    iaa.Flipud(1),
    iaa.Multiply((0.8, 1.2)),
    #iaa.GaussianBlur(sigma=(1.0, 3.0)),
    #iaa.AdditiveGaussianNoise(scale=(0.03*255, 0.05*255))
])

In [2]:
  def image_aug(images_path, mask_path, aug_images_path, image_prefix):
    # create data frame which we're going to populate with augmented image info
    
   
    
    for element in os.listdir(images_path):
        if os.path.isdir(element):
            print("'%s' un dossier" % element)
        else:
            #print("'%s' est un fichier" % element)
            # Load an example image (uint8, 128x128x3).
            #image = ia.quokka(size=(128, 128), extract="square")
            #imori = imageio.imread(images_path+element, as_gray=False, pilmode="RGB")
            #immask = imageio.imread(mask_path+element, as_gray=True, pilmode="L")
            images = np.zeros((1, 1000, 1000, 3),dtype=np.uint8)
            imagesmask = np.zeros((1, 1000, 1000,3),dtype=np.uint8 )
            scal = random.uniform(0.8, 1.2)
            mult = random.uniform(0.8, 1.2)
            elas = random.uniform(0.01, 0.05)
            elas2 = random.uniform(0.01, 0.1)
            

            aug = iaa.Sequential([
                iaa.Affine(scal, name = "test"),
                #iaa.Affine(translate_percent={"x":(-0.3, 0.3),"y":(-0.3, 0.3)}),
                iaa.Fliplr(1),
                iaa.Flipud(1),
                iaa.Multiply(mult),
                #iaa.PerspectiveTransform(elas,name = "test3"),
                iaa.PiecewiseAffine(elas2, name = "test4"),
                #iaa.GaussianBlur(sigma=(1.0, 3.0)),
                #iaa.AdditiveGaussianNoise(scale=(0.03*255, 0.05*255))
            ])
            aug2 = iaa.Sequential([
                iaa.Affine(scal, name = "test"),
                #iaa.Affine(translate_percent={"x":(-0.3, 0.3),"y":(-0.3, 0.3)}),
                iaa.Fliplr(1),
                iaa.Flipud(1),
                #iaa.Multiply(mult),
                iaa.PerspectiveTransform(elas,name = "test3"),
                iaa.PiecewiseAffine(elas2, name = "test4"),
                #iaa.GaussianBlur(sigma=(1.0, 3.0)),
                #iaa.AdditiveGaussianNoise(scale=(0.03*255, 0.05*255))
            ])
            aug = aug.localize_random_state()
            aug = aug.to_deterministic()
            aug2 = aug2.to_deterministic()
            
            aug2 = aug2.copy_random_state(aug, matching ="name")
            
            
            
            img = cv2.imread(images_path+element, 1)
            images[0, :, :, :] = img
            
            imgmask = cv2.imread(mask_path+element, 1)
            imagesmask[0, :, :, :] = imgmask
            
            images_aug= aug.augment_images(images)
            
            mask_aug= aug2.augment_images(imagesmask)
            

            
            
            x = 0
            for im in images_aug:
                cv2.imwrite(aug_images_path+"aug_"+element, im)
                x+=1
            x = 0
            for im in mask_aug:
                cv2.imwrite(aug_images_path+"aug_mask_"+element, im)
                x+=1
            #imori = cv2.imread(images_path+element,cv2.IMREAD_COLOR)
            #immask = cv2.imread(mask_path+element,cv2.IMREAD_GRAYSCALE)
    
    
            #imageio.imwrite(aug_images_path+image_prefix+element, images_aug)
            #imageio.imwrite(aug_images_path+image_prefix+element, segmaps_aug)
    
            
        

        #   write augmented image to a file
            #imageio.imwrite(aug_images_path+image_prefix+filename, image_aug)  
        #   create a data frame with augmented values of image width and height
           
        #images_aug, segmaps_aug = aug(images=images, segmentation_maps=segmaps)
    

    return

In [5]:
image_aug('./data/train/image/','./data/train/label/','./augtrain/','')
#augmented_images_df = image_aug(resized_images_df, 'images/', 'aug_images/', 'aug1_', aug)

In [38]:
print(images)

NameError: name 'images' is not defined

In [18]:
print(augmented_images_df)

                               filename width height        xmin        ymin  \
0      aug1_-52_19__0_58_2019-09-24.jpg  1000   1000  514.124451  247.682175   
1      aug1_-52_19__0_58_2019-09-24.jpg  1000   1000  566.432495   10.989367   
2      aug1_-52_19__0_58_2019-09-24.jpg  1000   1000  944.280090  392.566620   
3      aug1_-52_19__0_62_2019-09-24.jpg  1000   1000  203.729828  583.828369   
4      aug1_-52_19__0_62_2019-09-24.jpg  1000   1000    0.000000  574.641724   
...                                 ...   ...    ...         ...         ...   
1116  aug1_-73_78__-3_91_2019-09-03.jpg  1000   1000         NaN         NaN   
1117  aug1_-73_82__-3_91_2019-09-03.jpg  1000   1000  597.000000  383.000000   
1118  aug1_-73_82__-3_91_2019-09-03.jpg  1000   1000    0.000000  228.000000   
1119  aug1_-73_82__-3_91_2019-09-03.jpg  1000   1000  506.000000  640.000000   
1120  aug1_-73_82__-3_91_2019-09-03.jpg  1000   1000   58.000000  838.000000   

            xmax        ymax    class  

In [19]:
#concat
all_labels_df = pd.concat([resized_images_df, augmented_images_df])
all_labels_df.to_csv('all_labels.csv', index=False)

In [20]:
#put all images together
for file in os.listdir('aug_images'):
    shutil.copy('aug_images/'+file, 'imagesAndAug/'+file)
for file in os.listdir('images'):
    shutil.copy('images/'+file, 'imagesAndAug/'+file)