# Utility Functions

## 1.1 Import Libraries

In [1]:
import os, math, cv2, dlib
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from keras.preprocessing import image

Using TensorFlow backend.


## 1.2 Global Parameters

In [2]:
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor('shape_predictor_68_face_landmarks.dat')

## 2.0 Data Pre-processing Functions

In [3]:
def shape_to_np(shape, dtype="int"):
    # initialize the list of (x, y)-coordinates
    coords = np.zeros((shape.num_parts, 2), dtype=dtype)

    # loop over all facial landmarks and convert them
    # to a 2-tuple of (x, y)-coordinates
    for i in range(0, shape.num_parts):
        coords[i] = (shape.part(i).x, shape.part(i).y)

    # return the list of (x, y)-coordinates
    return coords

def rect_to_bb(rect):
    # take a bounding predicted by dlib and convert it
    # to the format (x, y, w, h) as we would normally do
    # with OpenCV
    x = rect.left()
    y = rect.top()
    w = rect.right() - x
    h = rect.bottom() - y

    # return a tuple of (x, y, w, h)
    return (x, y, w, h)

def run_dlib_shape(image):
    # in this function we load the image, detect the landmarks of the face, and then return the image and the landmarks
    # load the input image, resize it, and convert it to grayscale
    resized_image = image.astype('uint8')

    gray = cv2.cvtColor(resized_image, cv2.COLOR_BGR2GRAY)
    gray = gray.astype('uint8')

    # detect faces in the grayscale image
    rects = detector(gray, 1)
    num_faces = len(rects)

    if num_faces == 0:
        return None, resized_image

    face_areas = np.zeros((1, num_faces))
    face_shapes = np.zeros((136, num_faces), dtype=np.int64)

    # loop over the face detections
    for (i, rect) in enumerate(rects):
        # determine the facial landmarks for the face region, then
        # convert the facial landmark (x, y)-coordinates to a NumPy
        # array
        temp_shape = predictor(gray, rect)
        temp_shape = shape_to_np(temp_shape)

        # convert dlib's rectangle to a OpenCV-style bounding box
        # [i.e., (x, y, w, h)],
        #   (x, y, w, h) = face_utils.rect_to_bb(rect)
        (x, y, w, h) = rect_to_bb(rect)
        face_shapes[:, i] = np.reshape(temp_shape, [136])
        face_areas[0, i] = w * h
    # find largest face and keep
    dlibout = np.reshape(np.transpose(face_shapes[:, np.argmax(face_areas)]), [68, 2])

    return dlibout, resized_image

def extract_features_labels_from_celeba():
    """
    This function extracts the landmarks features for all images in the folder 'Dataset/celeba'.
    It also extracts the gender and smiling labels for each image.
    :return:
        landmark_features:  an array containing 68 landmark points for each image in which a face was detected
        gender_labels:      an array containing the gender label (male=0 and female=1) for each image in
                            which a face was detected
        smiling_labels:     an array containing the smiling label (not smiling=0 and smiling=1) for each image in
                            which a face was detected
    """
    # Global Parameters
    basedir = './Datasets/celeba'
    images_dir = os.path.join(basedir,'img')
    labels_filename = 'labels.csv'

    # Setting paths of images and labels
    image_paths = [os.path.join(images_dir, l) for l in os.listdir(images_dir)]
    target_size = None
    labels_file = open(os.path.join(basedir, labels_filename), 'r')
    
    # Obtaining the labels
    lines = labels_file.readlines()
    lines = [line.strip('"\n') for line in lines[:]]
    gender_labels = {line.split('\t')[0] : int(line.split('\t')[2]) for line in lines[1:]}
    smiling_labels = {line.split('\t')[0] : int(line.split('\t')[3]) for line in lines[1:]}
    
    # Extract landmark features and labels
    if os.path.isdir(images_dir):
        all_features = []
        all_gender_labels = []
        all_smiling_labels = []
        for img_path in image_paths:
            if not img_path.endswith('.jpg'):
                continue
            file_name= img_path.split('.')[1].split('/')[-1]

            # load image
            img = image.img_to_array(
                image.load_img(img_path,
                               target_size=target_size,
                               interpolation='bicubic'))
            features, _ = run_dlib_shape(img)
            if features is not None:
                all_features.append(features)
                all_gender_labels.append(gender_labels[file_name])
                all_smiling_labels.append(smiling_labels[file_name])

    landmark_features = np.array(all_features)
    gender_labels = (np.array(all_gender_labels) + 1)/2 # simply converts the -1 into 0, so male=0 and female=1
    smiling_labels = (np.array(all_smiling_labels) + 1)/2
    return landmark_features, gender_labels, smiling_labels

def extract_features_labels_from_cartoon_set():
    """
    This function extracts the landmarks features for all images in the folder 'Dataset/cartoon_set'.
    It also extracts the eye color and face shape labels for each image.
    :return:
        landmark_features:  an array containing 68 landmark points for each image in which a face was detected
        eye_color_labels:   an array containing the eye color labels for each image in
                            which a face was detected
        face_shape_labels:  an array containing the face shape labels for each image in
                            which a face was detected
    """
    # Global Parameters
    basedir = './Datasets/cartoon_set'
    images_dir = os.path.join(basedir,'img')
    labels_filename = 'labels.csv'

    # Setting paths of images and labels
    image_paths = [os.path.join(images_dir, l) for l in os.listdir(images_dir)]
    target_size = None
    labels_file = open(os.path.join(basedir, labels_filename), 'r')

    # Obtaining the labels
    lines = labels_file.readlines()
    lines = [line.strip('"\n') for line in lines[:]]
    eye_color_labels = {line.split('\t')[0] : int(line.split('\t')[1]) for line in lines[1:]}
    face_shape_labels = {line.split('\t')[0] : int(line.split('\t')[2]) for line in lines[1:]}

    # Extract landmark features and labels
    if os.path.isdir(images_dir):
        all_features = []
        all_eye_color_labels = []
        all_face_shape_labels = []
        for img_path in image_paths:
            if not img_path.endswith('.png'):
                continue
            file_name= img_path.split('.')[1].split('/')[-1]

            # load image
            img = image.img_to_array(
                image.load_img(img_path,
                               target_size=target_size,
                               interpolation='bicubic'))
            features, _ = run_dlib_shape(img)
            if features is not None:
                all_features.append(features)
                all_eye_color_labels.append(eye_color_labels[file_name])
                all_face_shape_labels.append(face_shape_labels[file_name])

    landmark_features = np.array(all_features)
    eye_color_labels = np.array(all_eye_color_labels)
    face_shape_labels = np.array(all_face_shape_labels)
    return landmark_features, eye_color_labels, face_shape_labels

# Archive

In [None]:
def split_df(df):
    """
    :Function: Split the column into separate columns by '\t'
    :df type: Pandas dataframe - unprocessed data
    :rtype: Pandas dataframe - processed and split data
    """
    split_function = lambda x: pd.Series([i for i in str(x).split('\t')])
    processed_df = df['\timg_name\tgender\tsmiling'].apply(split_function)
    processed_df.rename(columns={0:'index', 1:'img_name', 2:'gender', 3:'smiling'}, inplace=True)
    processed_df.drop(['index'], axis=1, inplace=True)
    return processed_df

# # PATH TO ALL IMAGES
# global basedir, image_paths, target_size
# basedir = './Datasets/celeba'
# images_dir = os.path.join(basedir,'img')
# labels_filename = 'labels.csv'

