# Image Preprocessing and Augmentation

This notebook handles 2 aspects of preparing the data:
1. Preprocessing of images by finding the face in each image and performing affine transformations to normalize them
2. Using affine transformations to generate more images based on the training set.

In [1]:
import dlib
import cv2
import numpy as np
import matplotlib.pyplot as plt
from keras.preprocessing.image import ImageDataGenerator
from keras.utils import np_utils
from keras import backend as K
from sklearn.model_selection import train_test_split
K.set_image_dim_ordering('th')

Using TensorFlow backend.


In [2]:
def show_image(image):
    """Helper method to show a greyscale image."""
    plt.imshow(image, cmap=plt.cm.gray)
    plt.axis('off')
    plt.show()

In [3]:
# The code here is adapted from the following 2 sources.
# https://github.com/cmusatyalab/openface/blob/master/openface/align_dlib.py
# https://gist.github.com/ageitgey/82d0ea0fdb56dc93cb9b716e7ceb364b

TEMPLATE = np.float32([
    (0.0792396913815, 0.339223741112), (0.0829219487236, 0.456955367943),
    (0.0967927109165, 0.575648016728), (0.122141515615, 0.691921601066),
    (0.168687863544, 0.800341263616), (0.239789390707, 0.895732504778),
    (0.325662452515, 0.977068762493), (0.422318282013, 1.04329000149),
    (0.531777802068, 1.06080371126), (0.641296298053, 1.03981924107),
    (0.738105872266, 0.972268833998), (0.824444363295, 0.889624082279),
    (0.894792677532, 0.792494155836), (0.939395486253, 0.681546643421),
    (0.96111933829, 0.562238253072), (0.970579841181, 0.441758925744),
    (0.971193274221, 0.322118743967), (0.163846223133, 0.249151738053),
    (0.21780354657, 0.204255863861), (0.291299351124, 0.192367318323),
    (0.367460241458, 0.203582210627), (0.4392945113, 0.233135599851),
    (0.586445962425, 0.228141644834), (0.660152671635, 0.195923841854),
    (0.737466449096, 0.182360984545), (0.813236546239, 0.192828009114),
    (0.8707571886, 0.235293377042), (0.51534533827, 0.31863546193),
    (0.516221448289, 0.396200446263), (0.517118861835, 0.473797687758),
    (0.51816430343, 0.553157797772), (0.433701156035, 0.604054457668),
    (0.475501237769, 0.62076344024), (0.520712933176, 0.634268222208),
    (0.565874114041, 0.618796581487), (0.607054002672, 0.60157671656),
    (0.252418718401, 0.331052263829), (0.298663015648, 0.302646354002),
    (0.355749724218, 0.303020650651), (0.403718978315, 0.33867711083),
    (0.352507175597, 0.349987615384), (0.296791759886, 0.350478978225),
    (0.631326076346, 0.334136672344), (0.679073381078, 0.29645404267),
    (0.73597236153, 0.294721285802), (0.782865376271, 0.321305281656),
    (0.740312274764, 0.341849376713), (0.68499850091, 0.343734332172),
    (0.353167761422, 0.746189164237), (0.414587777921, 0.719053835073),
    (0.477677654595, 0.706835892494), (0.522732900812, 0.717092275768),
    (0.569832064287, 0.705414478982), (0.635195811927, 0.71565572516),
    (0.69951672331, 0.739419187253), (0.639447159575, 0.805236879972),
    (0.576410514055, 0.835436670169), (0.525398405766, 0.841706377792),
    (0.47641545769, 0.837505914975), (0.41379548902, 0.810045601727),
    (0.380084785646, 0.749979603086), (0.477955996282, 0.74513234612),
    (0.523389793327, 0.748924302636), (0.571057789237, 0.74332894691),
    (0.672409137852, 0.744177032192), (0.572539621444, 0.776609286626),
    (0.5240106503, 0.783370783245), (0.477561227414, 0.778476346951)])

TPL_MIN, TPL_MAX = np.min(TEMPLATE, axis=0), np.max(TEMPLATE, axis=0)
MINMAX_TEMPLATE = (TEMPLATE - TPL_MIN) / (TPL_MAX - TPL_MIN)

OUTER_EYES_AND_NOSE = [36, 45, 33]
np_landmark_indices = np.array(OUTER_EYES_AND_NOSE)

# The required pre-trained face detection model can be downloaded here:
# http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2
predictor_model = "shape_predictor_68_face_landmarks.dat"

# Initialise the predictor and detector from dlib
predictor = dlib.shape_predictor(predictor_model)
detector = dlib.get_frontal_face_detector()


def get_aligned_faces(image, img_dim=50):
    """
    Get a list of faces found in the given image,
    if no face was found, returns an empty list
    """
    detected_faces = detector(image, 1)
    aligned_faces = []
    
    for i, face_rect in enumerate(detected_faces):
#         print("Face #{} found at Left: {} Top: {} Right: {} Bottom: {}"
#               .format(i, face_rect.left(), face_rect.top(), face_rect.right(), face_rect.bottom()))
        landmarks = predictor(image, face_rect)

        np_landmarks = np.float32(list(map(lambda p: (p.x, p.y), landmarks.parts())))
        
        H = cv2.getAffineTransform(np_landmarks[np_landmark_indices],
                                   img_dim * MINMAX_TEMPLATE[np_landmark_indices])
    
        thumbnail = cv2.warpAffine(image, H, (img_dim, img_dim))
        aligned_faces.append(thumbnail)
        
    return aligned_faces

## Preprocess training dataset

#### Align `X_train` images

In [4]:
images = np.load('X_train.npy')
labels = np.load('y_train.npy')

# Reshape the input images and convert them to integers, the alignment of faces requires integer values
images_reshaped = images.reshape(images.shape[0], 50, 37).astype('uint8')

In [5]:
aligned_images = []
filtered_labels = []

# Get all aligned images from all given X_train images
for idx, image in enumerate(images_reshaped):
    faces = get_aligned_faces(image)
    if faces:
        aligned_images.append(faces[0])
        filtered_labels.append(labels[idx])

In [6]:
# Save the aligned images
np_aligned_images = np.array(aligned_images).reshape(len(aligned_images), 50*50)
np.save("X_train_aligned.npy", np_aligned_images)
np.save("y_train_aligned.npy", np.array(filtered_labels))

#### Align `X_test` images

In [7]:
test_images = np.load('X_test.npy')
test_images_reshaped = test_images.reshape(test_images.shape[0], 50, 37).astype('uint8')

In [8]:
aligned_test_images = []
missing_idx = []  # Test images where no face was found are handled differently, as stated in the README.

for idx, image in enumerate(test_images_reshaped):
    faces = get_aligned_faces(image)
    if faces:
        aligned_test_images.append(faces[0])
    else:
        missing_idx.append(idx)

In [9]:
np_aligned_test_images = np.array(aligned_test_images).reshape(len(aligned_test_images), 50*50)
np.save("X_test_aligned.npy", np_aligned_test_images)

In [10]:
missing_idx

[25, 85, 104, 106, 125, 131, 180, 202, 211, 233, 238, 244, 279, 283, 304, 309]

## Augment training dataset

This section generates more images by performing randomised affine transformations to the given dataset.

In [11]:
h = 50
w = 37

def shapeData(data, h=h, w=w):
    return data.reshape(data.shape[0], 1, h, w).astype('float32')

#### Split `X_train` set into `X_train_train` (training set) and `X_train_test` (validation set for model selection)

In [12]:
seed = 3244
images_train, images_test, labels_train, labels_test = train_test_split(images, labels, test_size=0.2, random_state=seed)

#### Augment `X_train_train` images (to be included in unaligned training set)

In [13]:
images_reshaped_float = shapeData(images_train)

# Create a generator that performs shearing, zooming and/or rotation on the given images
datagen = ImageDataGenerator(shear_range=0.2,
                             zoom_range=0.2,
                             rotation_range=10)
datagen.fit(images_reshaped_float)

new_data = []  # store new images
new_label = []  # store the label of new images
number_of_batches = 800
batches = 0

for X_batch, Y_batch in datagen.flow(images_reshaped_float, labels_train, batch_size=number_of_batches):
    image = X_batch[0][0]
    label = Y_batch[0]
    
    faces = get_aligned_faces(image.astype('uint8'))
    if not faces:  # If no face was found, ignore the generated image
        continue
        
    new_data.append(image.flatten())
    new_label.append(label)
    batches += 1
    if batches >= number_of_batches:
        break

np.save("X_train_generated_train.npy", np.array(new_data))
np.save("y_train_generated_train.npy", np.array(new_label))

#### Augment `X_train_test` images (not used for validation but added to full training set after model selection to train full unaligned model)

In [14]:
images_reshaped_float = shapeData(images_test)

datagen = ImageDataGenerator(shear_range=0.2,
                             zoom_range=0.2,
                             rotation_range=10)
datagen.fit(images_reshaped_float)

new_data = []  # store new images
new_label = []  # store the label of new images
number_of_batches = 200
batches = 0

for X_batch, Y_batch in datagen.flow(images_reshaped_float, labels_test, batch_size=number_of_batches):
    image = X_batch[0][0]
    label = Y_batch[0]
    
    faces = get_aligned_faces(image.astype('uint8'))
    if not faces:
        continue
        
    new_data.append(image.flatten())
    new_label.append(label)
    batches += 1
    if batches >= number_of_batches:
        break

np.save("X_train_generated_test.npy", np.array(new_data))
np.save("y_train_generated_test.npy", np.array(new_label))

#### Align `X_train_train` images (to be included in aligned training set)

In [15]:
images = np.load('X_train_generated_train.npy')
labels = np.load('y_train_generated_train.npy')
images_reshaped = images.reshape(images.shape[0], 50, 37).astype('uint8')

aligned_images = []
filtered_labels = []
for idx, image in enumerate(images_reshaped):
    faces = get_aligned_faces(image)
    if not faces:
        continue
    aligned_images.append(faces[0])
    filtered_labels.append(labels[idx])

np_aligned_images = np.array(aligned_images).reshape(len(aligned_images), 50*50)
np.save("X_train_generated_train_aligned.npy", np_aligned_images)
np.save("y_train_generated_train_aligned.npy", np.array(filtered_labels))

#### Align `X_train_test` images (not used for validation but added to full training set after model selection to train full aligned model)

In [16]:
images = np.load('X_train_generated_test.npy')
labels = np.load('y_train_generated_test.npy')
images_reshaped = images.reshape(images.shape[0], 50, 37).astype('uint8')

aligned_images = []
filtered_labels = []
for idx, image in enumerate(images_reshaped):
    faces = get_aligned_faces(image)
    if not faces:
        continue
    aligned_images.append(faces[0])
    filtered_labels.append(labels[idx])

np_aligned_images = np.array(aligned_images).reshape(len(aligned_images), 50*50)
np.save("X_train_generated_test_aligned.npy", np_aligned_images)
np.save("y_train_generated_test_aligned.npy", np.array(filtered_labels))