## Data Preperation

In [None]:
import lib
import time
import numpy as np
import imgaug as ia
import tensorflow as tf
import matplotlib as mp

from matplotlib import pyplot as pp
from tensorflow import keras as kr

In [None]:
# Do not run this file by itself, this variables must be defined
# in the file importing this as they are implementation dependant
try:
    (REQUIRED_DIMENSIONS 
         or TRAIN_BATCH_SIZE 
         or VALIDATION_BATCH_SIZE 
         or TEST_BATCH_SIZE
         or N_AUG_VS_TEST)
except NameError:
    raise Exception('One of the required global constants is missing.\n -> Do not run this file by itself?')

# Implementation idependent globals
DATASET_ROOT      = './dataset/'
TRAIN_FOLDER      = 'train'
VALIDATION_FOLDER = 'validation'
TEST_FOLDER       = 'test'

In [None]:
start_time = time.time()

#### Custom Functions

In [None]:
# Sometimes(0.5, ...) applies the given augmenter in 50% of all cases,
# e.g. Sometimes(0.5, GaussianBlur(0.3)) would blur roughly every second image.
sometimes = lambda aug: ia.augmenters.Sometimes(0.5, aug)

# Augment training set
seq = ia.augmenters.Sequential([
    # crop images from each side by 0 to 16px (randomly chosen)
    ia.augmenters.Crop(px=(0, 16)),
    # horizontally flip 50% of the images
    ia.augmenters.Fliplr(0.5),
    # vertically flip 20% of all image
    ia.augmenters.Flipud(0.2),
])

def augment(img):
    seq_det = seq.to_deterministic()
    aug_image = seq_det.augment_image(img)

    return aug_image

In [None]:
# Tool to display data set and its labels
def plots(ims, figsize=(12,6), rows=1, interp=False, titles=None):
	if type(ims[0]) is np.ndarray:
		ims = np.array(ims).astype(np.uint8)
		if (ims.shape[-1] != 3):
			ims = ims.transpose((0,2,3,1))
	f = pp.figure(figsize=figsize)
	cols = len(ims)//rows if len(ims) % 2 == 0 else len(ims)//rows + 1
	for i in range(len(ims)):
		sp = f.add_subplot(rows, cols, i+1)
		sp.axis('Off')
		if titles is not None:
			sp.set_title(titles[i], fontsize=16)
		pp.imshow(ims[i], interpolation=None if interp else 'none')

In [None]:
lib.log("Loaded custom functions.")

#### Load Existing Dataset & Perform Data Augmentation

In [None]:
# Shortcut
IDG = kr.preprocessing.image.ImageDataGenerator

# No augmentation
generator = IDG()

# Augmentation
agumented_generator = IDG(preprocessing_function=augment)

lib.log("Loading regular data sets...")

# Iterator for training data set
train_set = generator.flow_from_directory(
    DATASET_ROOT + TRAIN_FOLDER, 
    target_size=REQUIRED_DIMENSIONS, 
    batch_size=TRAIN_BATCH_SIZE,
    shuffle=False
)

# Iterator for validation data set
validation_set = generator.flow_from_directory(
    DATASET_ROOT + VALIDATION_FOLDER, 
    target_size=REQUIRED_DIMENSIONS, 
    batch_size=VALIDATION_BATCH_SIZE,
    shuffle=False
)

# Iterator for test data set
test_set = generator.flow_from_directory(
    DATASET_ROOT + TEST_FOLDER, 
    target_size=REQUIRED_DIMENSIONS, 
    batch_size=TEST_BATCH_SIZE,
    shuffle=False
);

lib.log("Loading augmented data sets...")

# Iterator for augmented training data set
augmented_train_set = agumented_generator.flow_from_directory(
    DATASET_ROOT + TRAIN_FOLDER, 
    target_size=REQUIRED_DIMENSIONS, 
    batch_size=TRAIN_BATCH_SIZE,
    shuffle=False
)

In [None]:
lib.log("Loaded existing dataset & performed data augmentation.")

#### Visualising augmented test set VS normal test set

In [None]:
lib.log("Displaying first " + str(N_AUG_VS_TEST) + " batches of the augmented_train_set VS train_set:")

for i in range(N_AUG_VS_TEST):
    imgs, labels = next(augmented_train_set)
    plots(imgs, titles=labels)

for i in range(N_AUG_VS_TEST):
    imgs, labels = next(train_set)
    plots(imgs, titles=labels)

In [None]:
lib.log("Data Augmentation & Dataset loading completed!\n\tTime taken: ", time.time() - start_time)