# Augment Training Dataset With Image Transformations

#### Load and process images

Remove transparency:
```
mogrify -background white -flatten ./*.png
```

Resize as necessary:
```
find . -name "*.png" -print0 | xargs -0 mogrify -resize 20%
```
or
```
mogrify -resize 3720x5260! ./*.png
```

In [1]:
%matplotlib inline
%reload_ext autoreload
%autoreload 2

import os
import glob
import time
from tqdm import tqdm

import numpy as np
import pandas as pd
from PIL import Image as p_image
import imgaug.augmenters as ia

#### Parameters

In [2]:
# Augment images:
PATH = os.path.join(os.environ['HKT'], 'Logos-Recognition-Training', 'data')
FORE_FILES = os.path.join(PATH, 'exemplars_100x100', '*.jpg')
BACK_FILES = ''
AUGMENT_PATH = os.path.join(PATH, 'exemplars_100x100_aug')
PARAMS = {
    'Multiply': [0.5, 1.5],
    'GaussianBlur': [0.4],
    'AdditiveGaussianNoise': [0.2*255],
    'AffineShear': [-25, 25],
    'AffineRotate': [-25, 25],
}
COLOR_SPACE = 'RGB'
MAX_INST = np.Inf
PROP = 1

# Compose images:
COMPOSE_PATH = ''
CSV_COLUMNS_TF = ['filename', 'width', 'height', 'class', 'xmin', 'ymin', 'xmax', 'ymax']
CSV_COLUMNS_KERAS = ['filename', 'xmin', 'ymin', 'xmax', 'ymax', 'class']
MARGIN = 100

## Augment Images

In [3]:
def load_image(filename):
    image = p_image.open(filename).convert(COLOR_SPACE)
    return np.expand_dims(image, axis=0)

In [4]:
def save_image(array, filename):
    image = p_image.fromarray(array.squeeze()).convert(COLOR_SPACE)
    image.save(filename)

In [5]:
def sequencer(mu, gabl, adga, afsh, afro):
    sequence = ia.Sequential([
        ia.Multiply(mul=mu),
        ia.GaussianBlur(sigma=gabl),
        ia.AdditiveGaussianNoise(scale=adga),
        ia.Affine(rotate=afro, shear=afsh),
    ])
    extension = '_mult' + str(  mu) + '_gabl' + str(gabl) +\
                '_adga' + str(adga) + '_afsh' + str(afsh) +\
                '_afro' + str(afro)
    return sequence, extension

In [6]:
def augment_and_save(mu, gabl, adga, afsh, afro,
                     proportion, file, counter, max_inst,
                     save_path):
    
    # 3^12 = 531 441, therefore only make a transformation with p = proportion:
    if (counter < max_inst) and (np.random.uniform() < proportion):

        # Process image:
        seq, ext = sequencer(mu, gabl, adga, afsh, afro)
        image = load_image(file)
        aug_image = seq.augment_images(image)

        # Store image:
        counter += 1
        ending = '_' + str(counter).zfill(3) + ext + '_aug'
        basename = os.path.basename(file)
        name, extension = os.path.splitext(basename)
        new_name = name + ending + extension
        new_file = os.path.join(save_path, new_name)
        save_image(aug_image, new_file)
        return counter
    
    else:
        return counter

In [7]:
# Source images to crate augmentations:
augment_files = sorted(glob.glob(FORE_FILES))
n_variations = 1
for values in PARAMS.values():
    n_variations *= len(values)

print(len(augment_files), 'source images to process.', flush=True)
print('Approx.', int(n_variations * PROP), 'variations per file.', flush=True)
print('Limit set to max.', MAX_INST, 'variations per file.', flush=True)

for file in tqdm(augment_files):
    counter = 0

    # For each combination:
    for mu in PARAMS['Multiply']:
        for gabl in PARAMS['GaussianBlur']:
            for adga in PARAMS['AdditiveGaussianNoise']:
                for afsh in PARAMS['AffineShear']:
                    for afro in PARAMS['AffineRotate']:

                        # Process image:
                        counter = augment_and_save(  mu, gabl, adga, afsh, afro,
                                                   PROP, file, counter, MAX_INST,
                                                   AUGMENT_PATH)

352 source images to process.
Approx. 8 variations per file.
Limit set to max. inf variations per file.


100%|██████████| 352/352 [00:09<00:00, 35.47it/s]


## Compose Images

In [8]:
def compose_array_and_label(image_name, back_width, back_heigth, logo_class,
                            y_start, x_start, logo_width, logo_heigth,
                            images_list, back_arr, logo_arr):
    
    # Compose label:
    x_end = x_start + logo_width
    y_end = y_start + logo_heigth
    image_data = (image_name,   # filename
                  back_width,   # width
                  back_heigth,  # height
                  logo_class,   # class
                  x_start,      # xmin
                  y_start,      # ymin
                  x_end,        # xmax
                  y_end)        # ymax
    images_list.append(image_data)
    
    # Compose array:
    back_arr[y_start:y_end, x_start:x_end] = logo_arr
    
    return images_list, back_arr

In [9]:
logo_files = sorted(glob.glob(FORE_FILES))  # Augmented foregrounds to combine.
back_files = sorted(glob.glob(BACK_FILES))  # Augmented backgrounds to combine.
print('Available augmented backgounds:', len(back_files))
print('Available augmented logos:     ', len(logo_files))

images_list = []
for logo_file in logo_files:
    # Define array parameters:
    logo_arr = np.array(p_image.open(logo_file).convert(COLOR_SPACE))
    logo_heigth = np.shape(logo_arr)[0]  # Height
    logo_width = np.shape(logo_arr)[1]  # Width
    
    # Define label parameters:
    logo_name =      logo_file.split('/')[-1]
    logo_class =     logo_name.split('_')[0]
    logo_class_num = logo_name.split('_')[1].zfill(3)
    logo_num =       logo_name.split('_')[2].zfill(3)
    logo_new_name = '{}_{}_{}'.format(logo_class, logo_class_num, logo_num)
    
    for back_file in back_files:
        # Define array parameters:
        back_arr = np.array(p_image.open(back_file).convert(COLOR_SPACE))
        back_heigth = np.shape(back_arr)[0]  # Height
        back_width = np.shape(back_arr)[1]  # Width
        
        # Define label parameters:
        back_name =      back_file.split('/')[-1]
        back_class =     back_name.split('_')[0]
        back_class_num = back_name.split('_')[1].zfill(3)
        back_num =       back_name.split('_')[2].zfill(3)
        back_new_name = '{}_{}_{}'.format(back_class, back_class_num, back_num)
        
        # NOTE: PNG creates files 3x larger than JPG.
        image_name = '{}_{}.jpg'.format(logo_new_name, back_new_name)
        
        min_u = int(0 + MARGIN)
        max_u = int(back_heigth / 2 - logo_heigth - MARGIN)
        
        min_l = int(0 + MARGIN)
        max_l = int(back_width / 2 - logo_width - MARGIN)
        
        min_d = int(back_heigth / 2 + MARGIN)
        max_d = int(back_heigth - logo_heigth - MARGIN)
        
        min_r = int(back_width / 2 + MARGIN)
        max_r = int(back_width - logo_width - MARGIN)
        
        # Verify thet the logo is not too big:
        assert (max_u - min_u) > 0
        assert (max_d - min_d) > 0
        assert (max_l - min_l) > 0
        assert (max_r - min_r) > 0
        
        # UP and LEFT:
        u_start = np.random.randint(min_u, max_u)
        l_start = np.random.randint(min_l, max_l)
        images_list, back_arr = compose_array_and_label(image_name, back_width, back_heigth, logo_class,
                                                        u_start, l_start, logo_width, logo_heigth,
                                                        images_list, back_arr, logo_arr)
        # UP and RIGHT:
        u_start = np.random.randint(min_u, max_u)
        r_start = np.random.randint(min_r, max_r)
        images_list, back_arr = compose_array_and_label(image_name, back_width, back_heigth, logo_class,
                                                        u_start, r_start, logo_width, logo_heigth,
                                                        images_list, back_arr, logo_arr)
        # DOWN and LEFT:
        d_start = np.random.randint(min_d, max_d)
        l_start = np.random.randint(min_l, max_l)
        images_list, back_arr = compose_array_and_label(image_name, back_width, back_heigth, logo_class,
                                                        d_start, l_start, logo_width, logo_heigth,
                                                        images_list, back_arr, logo_arr)
        # DOWN and RIGHT:
        d_start = np.random.randint(min_d, max_d)
        r_start = np.random.randint(min_r, max_r)
        images_list, back_arr = compose_array_and_label(image_name, back_width, back_heigth, logo_class,
                                                        d_start, r_start, logo_width, logo_heigth,
                                                        images_list, back_arr, logo_arr)
        # Save final image:
        save_image(back_arr, COMPOSE_PATH + image_name)
        
    # Compose TF labels CSV:
    image_df = pd.DataFrame(images_list, columns=CSV_COLUMNS_TF)
    image_df.to_csv(COMPOSE_PATH + '/train_labels_tf.csv', index=False, mode='w')
    
    # Compose KERAS labels CSV:
    image_df.drop(columns=['width', 'height'])
    image_df.to_csv(COMPOSE_PATH + '/train_labels_keras.csv', index=False, mode='w',
                    columns=CSV_COLUMNS_KERAS, header=False)

Available augmented backgounds: 100
Available augmented logos:      60
