### Data Augmentation Techniques
* Scaling
* Translation
* Rotation (at 90 degrees)
* Rotation (at finer angles)
* Flipping
* Adding Salt and Pepper noise
* Lighting condition
* Perspective transform

In [1]:
!pip3 install -q tf-models-nightly --user

In [2]:
import tensorflow as tf
import tensorflow_models as tfm
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import matplotlib.gridspec as gridspec
import numpy as np
import os
import cv2
from math import floor, ceil, pi
import scipy
from PIL import Image
import glob

%matplotlib inline

## Change image size if needed


In [3]:
IMAGE_SIZE = 128

path = ''

In [4]:
# Image Resizing
def tf_resize_images(X_img_file_paths):
    X_data = []
        
    # Each image is resized individually as different image may be of different size.
    # @tf.function
    def r(X):
        tf_img = tf.image.resize(X, (IMAGE_SIZE, IMAGE_SIZE), 
                                    tf.image.ResizeMethod.NEAREST_NEIGHBOR)
        return tf_img
    
    for index, file_path in enumerate(X_img_file_paths):
        img = mpimg.imread(file_path)[:, :, :3] # Do not read alpha channel.
        resized_img = r(img)
        X_data.append(resized_img)

    X_data = np.array(X_data, dtype = np.float32) # Convert to numpy
    return X_data

#### Scaling:
Having differently scaled object of interest in the images is the most important aspect of image diversity. When your network is in hands of real users, the object in the image can be tiny or large. Also, sometimes, object can cover the entire image and yet will not be present totally in image (i.e cropped at edges of object). The code shows scaling of image centrally.

In [5]:
def central_scale_images(X_imgs, scales):
    # Various settings needed for Tensorflow operation
    boxes = np.zeros((len(scales), 4), dtype = np.float32)
    for index, scale in enumerate(scales):
        scale = scale
        x1 = y1 = 0.5 - 0.5 * scale # To scale centrally
        x2 = y2 = 0.5 + 0.5 * scale
        boxes[index] = np.array([y1, x1, y2, x2], dtype = np.float32)
    box_ind = np.zeros((len(scales)), dtype = np.int32)
    crop_size = np.array([IMAGE_SIZE, IMAGE_SIZE], dtype = np.int32)
    
    X_scale_data = []

    # Define Tensorflow operation for all scales but only one base image at a time
    # @tf.function
    def cr(X):
        return tf.image.crop_and_resize(X, boxes, box_ind, crop_size)
        
    for img_data in X_imgs:
        batch_img = np.expand_dims(img_data, axis = 0)
        scaled_imgs = cr(batch_img)
        X_scale_data.extend(scaled_imgs)
    
    X_scale_data = np.array(X_scale_data, dtype = np.float32)
    return X_scale_data

#### Translation:
We would like our network to recognize the object present in any part of the image. Also, the object can be present partially in the corner or edges of the image. For this reason, we shift the object to various parts of the image. This may also result in addition of a background noise. The code snippet shows translating the image at four sides retaining 80 percent of the base image.

In [6]:
from math import ceil, floor

def get_translate_parameters(index):
    if index == 0: # Translate left 20 percent
        offset = np.array([0.0, 0.2], dtype = np.float32)
        size = np.array([IMAGE_SIZE, ceil(0.8 * IMAGE_SIZE)], dtype = np.int32)
        w_start = 0
        w_end = int(ceil(0.8 * IMAGE_SIZE))
        h_start = 0
        h_end = IMAGE_SIZE
    elif index == 1: # Translate right 20 percent
        offset = np.array([0.0, -0.2], dtype = np.float32)
        size = np.array([IMAGE_SIZE, ceil(0.8 * IMAGE_SIZE)], dtype = np.int32)
        w_start = int(floor((1 - 0.8) * IMAGE_SIZE))
        w_end = IMAGE_SIZE
        h_start = 0
        h_end = IMAGE_SIZE
    elif index == 2: # Translate top 20 percent
        offset = np.array([0.2, 0.0], dtype = np.float32)
        size = np.array([ceil(0.8 * IMAGE_SIZE), IMAGE_SIZE], dtype = np.int32)
        w_start = 0
        w_end = IMAGE_SIZE
        h_start = 0
        h_end = int(ceil(0.8 * IMAGE_SIZE)) 
    elif index == 3: # Translate bottom 20 percent
        offset = np.array([-0.2, 0.0], dtype = np.float32)
        size = np.array([ceil(0.8 * IMAGE_SIZE), IMAGE_SIZE], dtype = np.int32)
        w_start = 0
        w_end = IMAGE_SIZE
        h_start = int(floor((1 - 0.8) * IMAGE_SIZE))
        h_end = IMAGE_SIZE
    elif index == 4: # Translate left 10 percent
        offset = np.array([0.0, 0.1], dtype = np.float32)
        size = np.array([IMAGE_SIZE, ceil(0.9 * IMAGE_SIZE)], dtype = np.int32)
        w_start = 0
        w_end = int(ceil(0.9 * IMAGE_SIZE))
        h_start = 0
        h_end = IMAGE_SIZE
    elif index == 5: # Translate right 10 percent
        offset = np.array([0.0, -0.1], dtype = np.float32)
        size = np.array([IMAGE_SIZE, ceil(0.9 * IMAGE_SIZE)], dtype = np.int32)
        w_start = int(floor((1 - 0.9) * IMAGE_SIZE))
        w_end = IMAGE_SIZE
        h_start = 0
        h_end = IMAGE_SIZE 
    elif index == 6: # Translate top 10 percent
        offset = np.array([0.1, 0.0], dtype = np.float32)
        size = np.array([ceil(0.9 * IMAGE_SIZE), IMAGE_SIZE], dtype = np.int32)
        w_start = 0
        w_end = IMAGE_SIZE
        h_start = 0
        h_end = int(ceil(0.9 * IMAGE_SIZE))  
    elif index == 7: # Translate bottom 10 percent
        offset = np.array([-0.1, 0.0], dtype = np.float32)
        size = np.array([ceil(0.9 * IMAGE_SIZE), IMAGE_SIZE], dtype = np.int32)
        w_start = 0
        w_end = IMAGE_SIZE
        h_start = int(floor((1 - 0.9) * IMAGE_SIZE))
        h_end = IMAGE_SIZE      
        
    return offset, size, w_start, w_end, h_start, h_end

def translate_images(X_imgs):
    offsets = np.zeros((len(X_imgs), 2), dtype = np.float32)
    n_translations = 8
    X_translated_arr = []
    

    for i in range(n_translations):
        X_translated = np.zeros((len(X_imgs), IMAGE_SIZE, IMAGE_SIZE, 3), 
                dtype = np.float32)
        X_translated.fill(1.0) # Filling background color
        base_offset, size, w_start, w_end, h_start, h_end = get_translate_parameters(i)
        offsets[:, :] = base_offset 
        glimpses = tf.image.extract_glimpse(X_imgs, size, offsets)
        
        X_translated[:, h_start: h_start + size[0], \
            w_start: w_start + size[1], :] = glimpses
        X_translated_arr.extend(X_translated)
        
    X_translated_arr = np.array(X_translated_arr, dtype = np.float32)
    return X_translated_arr

#### Rotation (at 90 degrees):
The network has to recognize the object present in any orientation. Assuming the image is square, rotating the image at 90 degrees will not add any background noise in the image.

In [7]:
def rotate_images(X_imgs):
    X_rotate = []

    @tf.function
    def rot90(X,k):
        return tf.image.rot90(X, k = k)

    for img in X_imgs:
        for i in range(3):  # Rotation at 90, 180 and 270 degrees
            rotated_img = rot90(img, i + 1 )
            X_rotate.append(rotated_img)
        
    X_rotate = np.array(X_rotate, dtype = np.float32)
    return X_rotate

#### Rotation (at finer angles):
Depending upon the requirement, there maybe a necessity to orient the object at minute angles. However problem with this approach is, it will add background noise. If the background in image is of a fixed color (say white or black), the newly added background can blend with the image. However, if the newly added background color doesn’t blend, the network may consider it as to be a feature and learn unnecessary features.

In [8]:
from math import pi

def rotate_images(X_imgs, start_angle, end_angle, n_images):
    X_rotate = []
    iterate_at = (end_angle - start_angle) / (n_images - 1)
    
    # @tf.function
    def rot(X,degrees):
        return tfm.vision.augment.rotate(
            X, degrees
        )
    
    for index in range(n_images):
        degrees_angle = start_angle + index * iterate_at
        rotated_imgs = rot( X_imgs, degrees_angle)
        X_rotate.extend(rotated_imgs)

    X_rotate = np.array(X_rotate, dtype = np.float32)
    return X_rotate

In [9]:
from math import pi

def shear_images(X_imgs, start_angle, end_angle, n_images):
    X_shear = []
    iterate_at = (end_angle - start_angle) / (n_images - 1)
    
    # @tf.function
    def shearx(X, degrees):
        return tfm.vision.augment.shear_x(
            X, degrees, [0,0,0]
        )
    
    for index in range(n_images):
        degrees_angle = start_angle + index * iterate_at
        rotated_imgs = shearx( X_imgs, degrees_angle)
        X_shear.extend(rotated_imgs)

    X_shear = np.array(X_shear, dtype = np.float32)
    return X_shear

In [10]:
from math import pi

def gaussian_noise_images(X_imgs, start_angle, end_angle, n_images):
    X_gn = []
    iterate_at = (end_angle - start_angle) / (n_images - 1)
    
    # @tf.function
    def gn(X, degrees):
        return tfm.vision.augment.gaussian_noise(
            X, 0.1, degrees
        )
    
    for index in range(n_images):
        degrees_angle = start_angle + index * iterate_at
        noised_imgs = gn( X_imgs, degrees_angle)
        X_gn.extend(noised_imgs)

    X_gn = np.array(X_gn, dtype = np.float32)
    return X_gn

#### Flipping:
This scenario is more important for network to remove biasness of assuming certain features of the object is available in only a particular side. Consider the case shown in image example. You don’t want network to learn that tilt of banana happens only in right side as observed in the base image. Also notice that flipping produces different set of images from rotation at multiple of 90 degrees.My additional question is has anyone done some study on what is the maximum number of classes it gives good performance. Consider, data can be generated with good amount of diversity for each class and time of training is not a factor.

In [11]:
def flip_images(X_imgs):
    X_flip = []

    @tf.function
    def flip(X):
        tf_img1 = tf.image.flip_left_right(X)
        tf_img2 = tf.image.flip_up_down(X)
        tf_img3 = tf.image.transpose(X)
        return [tf_img1, tf_img2, tf_img3]

    for img in X_imgs:
        flipped_imgs = flip(img)
        X_flip.extend(flipped_imgs)

    X_flip = np.array(X_flip, dtype = np.float32)
    return X_flip

#### Adding Salt and Pepper noise:
Salt and Pepper noise refers to addition of white and black dots in the image. Though this may seem unnecessary, it is important to remember that a general user who is taking image to feed into your network may not be a professional photographer. His camera can produce blurry images with lots of white and black dots. This augmentation aides the above mentioned users.


In [12]:
def add_salt_pepper_noise(X_imgs):
    # Need to produce a copy as to not modify the original image
    X_imgs_copy = X_imgs.copy()
    row, col, _ = X_imgs_copy[0].shape
    salt_vs_pepper = 0.2
    amount = 0.004
    num_salt = np.ceil(amount * X_imgs_copy[0].size * salt_vs_pepper)
    num_pepper = np.ceil(amount * X_imgs_copy[0].size * (1.0 - salt_vs_pepper))
    
    for X_img in X_imgs_copy:
        # Add Salt noise
        coords = [np.random.randint(0, i - 1, int(num_salt)) for i in X_img.shape]
        X_img[coords[0], coords[1], :] = 1

        # Add Pepper noise
        coords = [np.random.randint(0, i - 1, int(num_pepper)) for i in X_img.shape]
        X_img[coords[0], coords[1], :] = 0
    return X_imgs_copy

#### Lighting condition:
This is a very important type of diversity needed in the image dataset not only for the network to learn properly the object of interest but also to simulate the practical scenario of images being taken by the user. The lighting condition of the images are varied by adding Gaussian noise in the image.

In [13]:
def add_gaussian_noise(X_imgs):
    gaussian_noise_imgs = []
    row, col, _ = X_imgs[0].shape
    # Gaussian distribution parameters
    mean = 0
    var = 0.1
    sigma = var ** 0.5
    
    for X_img in X_imgs:
        gaussian = np.random.random((row, col, 1)).astype(np.float32)
        gaussian = np.concatenate((gaussian, gaussian, gaussian), axis = 2)
        gaussian_img = cv2.addWeighted(X_img, 0.75, 0.25 * gaussian, 0.25, 0)
        gaussian_noise_imgs.append(gaussian_img)
    gaussian_noise_imgs = np.array(gaussian_noise_imgs, dtype = np.float32)
    return gaussian_noise_imgs

#### Perspective transform:
In perspective transform, we try to project image from a different point of view. For this, the position of object should be known in advance. Merely calculating perspective transform without knowing the position of the object can lead to degradation of the dataset. Hence, this type of augmentation has to be performed selectively. The greatest advantage with this augmentation is that it can emphasize on parts of object in image which the network needs to learn.

In [14]:
def get_mask_coord(imshape):
    vertices = np.array([[(0.09 * imshape[1], 0.99 * imshape[0]), 
                          (0.43 * imshape[1], 0.32 * imshape[0]), 
                          (0.56 * imshape[1], 0.32 * imshape[0]),
                          (0.85 * imshape[1], 0.99 * imshape[0])]], dtype = np.int32)
    return vertices

def get_perspective_matrices(X_img):
    offset = 15
    img_size = (X_img.shape[1], X_img.shape[0])

    # Estimate the coordinates of object of interest inside the image.
    src = np.float32(get_mask_coord(X_img.shape))
    dst = np.float32([[offset, img_size[1]], [offset, 0], [img_size[0] - offset, 0], 
                      [img_size[0] - offset, img_size[1]]])
    
    perspective_matrix = cv2.getPerspectiveTransform(src, dst)
    return perspective_matrix

def perspective_transform(X_img):
    # Doing only for one type of example
    perspective_matrix = get_perspective_matrices(X_img)
    warped_img = cv2.warpPerspective(X_img, perspective_matrix,
                                     (X_img.shape[1], X_img.shape[0]),
                                     flags = cv2.INTER_LINEAR)
    return warped_img

In [15]:
def invert_images(X_imgs):
    X_inv = []
    def invert(X):
        return tfm.vision.augment.invert(
            tfm.vision.augment.grayscale(
                X
            )
        )
    for img in X_imgs:
        flipped_imgs = invert(img)
        X_inv.append(flipped_imgs)

    X_inv = np.array(X_inv, dtype = np.float32)
    return X_inv

## Generate and save augmented images

In [16]:
def resize_image(i, X_img):
    file_resized = "img_resized"
    filename_resized = new_img_name(i, file_resized)
    tf.keras.utils.save_img(filename_resized,X_img[0])

In [17]:
def scaled_image(folder, img):
    # scale = [0.97,0.96,0.95,0.94,0.93,0.92,0.91,0.90,0.89,0.88,0.87,0.86,0.85,0.84,0.80,0.75,0.70,0.65,0.60]
    scale = [0.99,0.98,0.97,0.96,0.95]
    scaled_imgs = central_scale_images(img, scale)
    for i in range(0, len(scale)):
        filename = "img_scale_{0}".format(i)
        filepath = new_img_name(folder, filename)
        #scipy.misc.imsave(filepath, scaled_imgs[i])
        tf.keras.utils.save_img(filepath, scaled_imgs[i])

In [18]:
def tranlate_image(folder, img):
    translated_imgs = translate_images(img)
    
    for i in range(-4, 4):
        filename = "img_translate_{0}".format(i)
        filepath = new_img_name(folder, filename)
        # scipy.misc.imsave(filepath, translated_imgs[i])
        tf.keras.utils.save_img(filepath, translated_imgs[i])

In [19]:
def shear(folder, img):
    sheared = shear_images(img, -0.1, 0.1, 14)
    
    for i in range(14):
        filename = "img_sheared_{0}".format(i)
        filepath = new_img_name(folder, filename)
        tf.keras.utils.save_img(filepath, sheared[i])

In [20]:
def rotate_general_image(folder, img):
    rotated = rotate_images(img, -10, 10, 14)
    
    for i in range(14):
        filename = "img_rotated_{0}".format(i)
        filepath = new_img_name(folder, filename)
        # scipy.misc.imsave(filepath, rotated[i] )
        tf.keras.utils.save_img(filepath, rotated[i])

In [21]:
def flipped_image(folder, img):
    flipped = flip_images(img)
    
    for i in range(3):
        filename = "img_flipped_{0}".format(i)
        filepath = new_img_name(folder, filename)
        # scipy.misc.imsave(filepath, flipped[i] )
        tf.keras.utils.save_img(filepath, flipped[i])

In [22]:
def salt_pepper(folder, img):
    salt = add_salt_pepper_noise(img)
    filename = "img_salt_pepper"
    filepath = new_img_name(folder, filename)
    # scipy.misc.imsave(filepath, salt[0] )
    tf.keras.utils.save_img(filepath, salt[0] )

In [23]:
def lighting(folder, img):
    gaussian = add_gaussian_noise(img)
    filename = "img_gaussian"
    filepath = new_img_name(folder, filename)
    # scipy.misc.imsave(filepath, gaussian[0] )
    tf.keras.utils.save_img(filepath, gaussian[0] )

In [24]:
def invert(folder, img):
    inv = invert_images(img)
    filename = "img_inverted"
    filepath = new_img_name(folder, filename)
    tf.keras.utils.save_img(filepath, inv[0] )

In [25]:
def gaussian_noise(folder, img):
    g_n = gaussian_noise_images(img, 0.1, 2.0, 14)
    for i in range(14):
        filename = "img_gaussian_noise_{0}".format(i)
        filepath = new_img_name(folder, filename)
        tf.keras.utils.save_img(filepath, g_n[i])

In [26]:
def exec_images(i, img):
    # # Resized Image
    # resize_image(i, img)
    # Scale 97% - 60%
    scaled_image(i, img)
    # Tranlate the images
    tranlate_image(i, img)
    # # Rotate the image 180, 270 degress
    rotate_general_image(i, img)
    # Shear the image
    shear(i, img)
    # # Flip the image
    # flipped_image(i, img)
    # Add noise in pixels
    salt_pepper(i, img)
    # Add gaussian noise in pixels
    gaussian_noise(i, img)
    # Lighting condition
    lighting(i, img)

In [27]:
def new_img_name(full_path, new_name):
        new_path = full_path.split("/")
        old_name = new_path.pop()
        old_name = old_name.split(".")[0] + "_"
        new_path = "/".join(new_path) + "/"
        filepath = path.format(new_path, old_name + new_name)
        return filepath

## Apply image augmentation

In [28]:
dirList = glob.glob("./augmented/*/*.png")
X_img_paths = ['{}'.format(file) for file in dirList]
# filename = 'img_0.png'
# path = ''

for img in X_img_paths:
        path = "{0}{1}." + img.split(".")[-1] #  img.split(".")[0] + 
        # print(path)
        X_resize = tf_resize_images([img])
        exec_images(img, X_resize)
    

## Create inverted images

In [29]:
dirList = glob.glob("./augmented/*/*.png")
X_img_paths = ['{}'.format(file) for file in dirList]

for img in X_img_paths:
        path = img.split(".")[0] + "{0}{1}." + img.split(".")[-1]
        X_resize = tf_resize_images([img])
        # Invert Image
        invert(img, X_resize)