In [None]:
dataset_folder_name = 'TrainFiles/Splits/FivePercentSplit'
testset_folder_name_WM = 'TestFiles/WM'
testset_folder_name_WW = 'TestFiles/WW'
testset_folder_name_BM = 'TestFiles/BM'
testset_folder_name_BW = 'TestFiles/BW'

TRAIN_TEST_SPLIT = 0.7
IM_WIDTH = IM_HEIGHT = 198

dataset_dict = {
    'race_id': {
        0: 'white', 
        1: 'black', 
        2: 'asian', 
        3: 'indian', 
        4: 'others'
    },
    'gender_id': {
        0: 'male',
        1: 'female'
    }
}

dataset_dict['gender_alias'] = dict((g, i) for i, g in dataset_dict['gender_id'].items())

import numpy as np 
import pandas as pd
import os
import glob
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import random

# Parsing

In [None]:
def parse_dataset(dataset_path, ext='jpg'):
    """
    Used to extract information about our dataset. It does iterate over all images and return a DataFrame with
    the data (age, gender and sex) of all files.
    """
    def parse_info_from_file(path):
        """
        Parse information from a single file
        """
        try:
            filename = os.path.split(path)[1]
            filename = os.path.splitext(filename)[0]
            age, gender, race, _ = filename.split('_')

            return dataset_dict['gender_id'][int(gender)]
        except Exception as ex:
            return None, None, None
        
    files = glob.glob(os.path.join(dataset_path, "*.%s" % ext))
    random.shuffle(files)
    
    records = []
    for file in files:
        info = parse_info_from_file(file)
        records.append(info)
        
    df = pd.DataFrame(records)
    df['file'] = files
    df.columns = ['gender', 'file']
    df = df.dropna()
    
    return df

In [None]:
df = parse_dataset(dataset_folder_name)
testset_WM = parse_dataset(testset_folder_name_WM)
testset_WW = parse_dataset(testset_folder_name_WW)
testset_BM = parse_dataset(testset_folder_name_BM)
testset_BW = parse_dataset(testset_folder_name_BW)
testset_WM.head()

# Makes Data Generator For Trainset

In [None]:
from keras.utils import to_categorical
from PIL import Image


class UtkFaceDataGenerator():
    """
    Data generator for the UTKFace dataset. This class should be used when training our Keras multi-output model.
    """
    def __init__(self, df):
        self.df = df
        
    def generate_split_indexes(self, SPLIT):
        p = np.random.permutation(len(self.df))
        train_up_to = int(len(self.df) * SPLIT)
        train_idx = p[:train_up_to]
        test_idx = p[train_up_to:]

        train_up_to = int(train_up_to * SPLIT)
        train_idx, valid_idx = train_idx[:train_up_to], train_idx[train_up_to:]
        
        # converts alias to id
        self.df['gender_id'] = self.df['gender'].map(lambda gender: dataset_dict['gender_alias'][gender])
        
        return train_idx, valid_idx, test_idx
    
    def preprocess_image(self, img_path):
        """
        Used to perform some minor preprocessing on the image before inputting into the network.
        """
        im = Image.open(img_path)
        im = im.resize((IM_WIDTH, IM_HEIGHT))
        im = np.array(im) / 255.0
        
        return im
        
    def generate_images(self, image_idx, is_training, batch_size=16):
        """
        Used to generate a batch with images when training/testing/validating our Keras model.
        """
        
        # arrays to store our batched data
        images, genders = [], []
        while True:
            for idx in image_idx:
                person = self.df.iloc[idx]
                
                gender = person['gender_id']
                file = person['file']
                
                im = self.preprocess_image(file)
                genders.append(to_categorical(gender, len(dataset_dict['gender_id'])))
                images.append(im)
                
                # yielding condition
                if len(images) >= batch_size:
                    yield np.array(images), [np.array(genders)]
                    images, genders = [], []
                    
            if not is_training:
                break
                
data_generator = UtkFaceDataGenerator(df)
train_idx, valid_idx, test_idx = data_generator.generate_split_indexes(TRAIN_TEST_SPLIT)

# Data Generator TestSet

In [None]:
TEST_DATA_SPLIT = 0
#WM
test_data_generator_WM = UtkFaceDataGenerator(testset_WM)
train_idx_test_WM, valid_idx_test_WM, test_idx_test_WM = test_data_generator_WM.generate_split_indexes(TEST_DATA_SPLIT)
#WW
test_data_generator_WW = UtkFaceDataGenerator(testset_WW)
train_idx_test_WW, valid_idx_test_WW, test_idx_test_WW = test_data_generator_WW.generate_split_indexes(TEST_DATA_SPLIT)
#BM
test_data_generator_BM = UtkFaceDataGenerator(testset_BM)
train_idx_test_BM, valid_idx_test_BM, test_idx_test_BM = test_data_generator_BM.generate_split_indexes(TEST_DATA_SPLIT)
#BW
test_data_generator_BW = UtkFaceDataGenerator(testset_BW)
train_idx_test_BW, valid_idx_test_BW, test_idx_test_BW = test_data_generator_BW.generate_split_indexes(TEST_DATA_SPLIT)






# # Making Model

In [None]:
from keras.models import Model
from keras.layers.normalization import BatchNormalization
from keras.layers.convolutional import Conv2D
from keras.layers.convolutional import MaxPooling2D
from keras.layers.core import Activation
from keras.layers.core import Dropout
from keras.layers.core import Lambda
from keras.layers.core import Dense
from keras.layers import Flatten
from keras.layers import Input
import tensorflow as tf

class UtkMultiOutputModel():
    """
    Used to generate our multi-output model. This CNN contains three branches, one for age, other for 
    sex and another for race. Each branch contains a sequence of Convolutional Layers that is defined
    on the make_default_hidden_layers method.
    """
    def make_default_hidden_layers(self, inputs):
        """
        Used to generate a default set of hidden layers. The structure used in this network is defined as:
        
        Conv2D -> BatchNormalization -> Pooling -> Dropout
        """
        x = Conv2D(16, (3, 3), padding="same")(inputs)
        x = Activation("relu")(x)
        x = BatchNormalization(axis=-1)(x)
        x = MaxPooling2D(pool_size=(3, 3))(x)
        x = Dropout(0.25)(x)

        x = Conv2D(32, (3, 3), padding="same")(x)
        x = Activation("relu")(x)
        x = BatchNormalization(axis=-1)(x)
        x = MaxPooling2D(pool_size=(2, 2))(x)
        x = Dropout(0.25)(x)

        x = Conv2D(32, (3, 3), padding="same")(x)
        x = Activation("relu")(x)
        x = BatchNormalization(axis=-1)(x)
        x = MaxPooling2D(pool_size=(2, 2))(x)
        x = Dropout(0.25)(x)

        return x

    def build_race_branch(self, inputs, num_races):
        """
        Used to build the race branch of our face recognition network.
        This branch is composed of three Conv -> BN -> Pool -> Dropout blocks, 
        followed by the Dense output layer.
        """
        x = self.make_default_hidden_layers(inputs)

        x = Flatten()(x)
        x = Dense(128)(x)
        x = Activation("relu")(x)
        x = BatchNormalization()(x)
        x = Dropout(0.5)(x)
        x = Dense(num_races)(x)
        x = Activation("softmax", name="race_output")(x)

        return x

    def build_gender_branch(self, inputs, num_genders=2):
        """
        Used to build the gender branch of our face recognition network.
        This branch is composed of three Conv -> BN -> Pool -> Dropout blocks, 
        followed by the Dense output layer.
        """
        x = Lambda(lambda c: tf.image.rgb_to_grayscale(c))(inputs)

        x = self.make_default_hidden_layers(inputs)

        x = Flatten()(x)
        x = Dense(128)(x)
        x = Activation("relu")(x)
        x = BatchNormalization()(x)
        x = Dropout(0.5)(x)
        x = Dense(num_genders)(x)
        x = Activation("sigmoid", name="gender_output")(x)

        return x

    def build_age_branch(self, inputs):   
        """
        Used to build the age branch of our face recognition network.
        This branch is composed of three Conv -> BN -> Pool -> Dropout blocks, 
        followed by the Dense output layer.

        """
        x = self.make_default_hidden_layers(inputs)

        x = Flatten()(x)
        x = Dense(128)(x)
        x = Activation("relu")(x)
        x = BatchNormalization()(x)
        x = Dropout(0.5)(x)
        x = Dense(1)(x)
        x = Activation("linear", name="age_output")(x)

        return x

    def assemble_full_model(self, width, height):
        """
        Used to assemble our multi-output model CNN.
        """
        input_shape = (height, width, 3)

        inputs = Input(shape=input_shape)

        gender_branch = self.build_gender_branch(inputs)

        model = Model(inputs=inputs,
                     outputs = [gender_branch],
                     name="face_net")

        return model
    
model = UtkMultiOutputModel().assemble_full_model(IM_WIDTH, IM_HEIGHT)

In [None]:
'''%matplotlib inline

from keras.utils import plot_model
import matplotlib.image as mpimg
import matplotlib.pyplot as plt

plot_model(model, to_file='model.png')
img = mpimg.imread('model.png')

plt.figure(figsize=(40, 30))
plt.imshow(img)'''

# Training

In [None]:
from keras.optimizers import Adam

init_lr = 1e-4
epochs = 100

opt = Adam(lr=init_lr, decay=init_lr / epochs)

model.compile(optimizer=opt, 
              loss={
                  'gender_output': 'binary_crossentropy'},
              loss_weights={ 
                  'gender_output': 0.1},
              metrics={
                  'gender_output': 'accuracy'})

valid_batch_size = 32
print(len(valid_idx))
print(len(valid_idx)//valid_batch_size)

And now let's train our model with a batch size of 32 for both valid and train sets. We will be using a ModelCheckpoint callback in order to save the model on disk at the end of each epoch.

In [None]:
from keras.callbacks import ModelCheckpoint

batch_size = 32
valid_batch_size = 32
train_gen = data_generator.generate_images(train_idx, is_training=True, batch_size=batch_size)
valid_gen = data_generator.generate_images(valid_idx, is_training=True, batch_size=valid_batch_size)

callbacks = [
    ModelCheckpoint("./model_checkpoint", monitor='val_loss')
]

history = model.fit_generator(train_gen,
                    steps_per_epoch=len(train_idx)//batch_size,
                    epochs=epochs,
                    callbacks=callbacks,
                    validation_data=valid_gen,
                    validation_steps=len(valid_idx)//valid_batch_size)

In [None]:
import plotly.graph_objects as go
from IPython.display import display, Image

### Gender accuracy

In [None]:
'''
plt.clf()

fig = go.Figure()
fig.add_trace(go.Scatter(
                    y=history.history['gender_output_acc'],
                    name='Train'))

fig.add_trace(go.Scatter(
                    y=history.history['val_gender_output_acc'],
                    name='Valid'))


fig.update_layout(height=450, 
                  width=600,
                  title='Accuracy for gender feature',
                  xaxis_title='Epoch',
                  yaxis_title='Accuracy')

fig.write_html('acc_gender.html', include_plotlyjs='cdn')

fig.show()
'''


### Overall loss

In [None]:

fig = go.Figure()
fig.add_trace(go.Scattergl(
                    y=history.history['loss'],
                    name='Train'))

fig.add_trace(go.Scattergl(
                    y=history.history['val_loss'],
                    name='Valid'))


fig.update_layout(height=450, 
                  width=600,
                  title='Overall loss',
                  xaxis_title='Epoch',
                  yaxis_title='Loss')

fig.write_html('overall_loss.html', include_plotlyjs='cdn')

fig.show()


# Testing

In [None]:
from PIL import Image

test_batch_size = 128


#### White Men

In [None]:
test_generator = test_data_generator_WM.generate_images(test_idx_test_WM, is_training=False, batch_size=test_batch_size)
gender_pred = model.predict_generator(test_generator, steps=len(test_idx_test_WM)//test_batch_size)



test_generator = test_data_generator_WM.generate_images(test_idx_test_WM, is_training=False, batch_size=test_batch_size)
samples = 0
images, gender_true = [], []
for test_batch in test_generator:
    image = test_batch[0]
    labels = test_batch[1]
    
    images.extend(image)
    gender_true.extend(labels[0])

gender_true = np.array(gender_true)

gender_true =  gender_true.argmax(axis=-1)
gender_pred =  gender_pred.argmax(axis=-1)




from sklearn.metrics import classification_report

cr_gender = classification_report(gender_true, gender_pred, target_names=dataset_dict['gender_alias'].keys())
print(cr_gender)

#### White Women

In [None]:
test_generator = test_data_generator_WW.generate_images(test_idx_test_WW, is_training=False, batch_size=test_batch_size)
gender_pred = model.predict_generator(test_generator, steps=len(test_idx_test_WW)//test_batch_size)



test_generator = test_data_generator_WW.generate_images(test_idx_test_WW, is_training=False, batch_size=test_batch_size)
samples = 0
images, gender_true = [], []
for test_batch in test_generator:
    image = test_batch[0]
    labels = test_batch[1]
    
    images.extend(image)
    gender_true.extend(labels[0])
    
gender_true = np.array(gender_true)

gender_true = gender_true.argmax(axis=-1)
gender_pred = gender_pred.argmax(axis=-1)




from sklearn.metrics import classification_report

cr_gender = classification_report(gender_true, gender_pred, target_names=dataset_dict['gender_alias'].keys())
print(cr_gender)

#### Black Men

In [None]:
test_generator = test_data_generator_BM.generate_images(test_idx_test_BM, is_training=False, batch_size=test_batch_size)
gender_pred = model.predict_generator(test_generator, steps=len(test_idx_test_BM)//test_batch_size)



test_generator = test_data_generator_BM.generate_images(test_idx_test_BM, is_training=False, batch_size=test_batch_size)
samples = 0
images, gender_true = [], []
for test_batch in test_generator:
    image = test_batch[0]
    labels = test_batch[1]
    
    images.extend(image)
    gender_true.extend(labels[0])
    
gender_true = np.array(gender_true)

gender_true = gender_true.argmax(axis=-1)
gender_pred = gender_pred.argmax(axis=-1)





from sklearn.metrics import classification_report

cr_gender = classification_report(gender_true, gender_pred, target_names=dataset_dict['gender_alias'].keys())
print(cr_gender)

#### Black Women

In [None]:
test_generator = test_data_generator_BW.generate_images(test_idx_test_BW, is_training=False, batch_size=test_batch_size)
gender_pred = model.predict_generator(test_generator, steps=len(test_idx_test_BW)//test_batch_size)



test_generator = test_data_generator_BW.generate_images(test_idx_test_BW, is_training=False, batch_size=test_batch_size)
samples = 0
images, gender_true = [], []
for test_batch in test_generator:
    image = test_batch[0]
    labels = test_batch[1]
    
    images.extend(image)
    gender_true.extend(labels[0])
    
gender_true = np.array(gender_true)

gender_true = gender_true.argmax(axis=-1)
gender_pred = gender_pred.argmax(axis=-1)





from sklearn.metrics import classification_report

cr_gender = classification_report(gender_true, gender_pred, target_names=dataset_dict['gender_alias'].keys())
print(cr_gender)

### Example of predictions

In [None]:
'''
import math
n = 16
random_indices = np.random.permutation(n)
n_cols = 4
n_rows = math.ceil(n / n_cols)
fig, axes = plt.subplots(n_rows, n_cols, figsize=(15, 17))
for i, img_idx in enumerate(random_indices):
    ax = axes.flat[i]
    ax.imshow(images[img_idx])
    
    cur_age_pred = age_pred[img_idx]
    cur_age_true = age_true[img_idx]
    
    cur_gender_pred = gender_pred[img_idx]
    cur_gender_true = gender_true[img_idx]
    
    cur_race_pred = race_pred[img_idx]
    cur_race_true = race_true[img_idx]
    
    age_threshold = 10
    if cur_gender_pred == cur_gender_true and cur_race_pred == cur_race_true and abs(cur_age_pred - cur_age_true) <= age_threshold:
        ax.xaxis.label.set_color('green')
    elif cur_gender_pred != cur_gender_true and cur_race_pred != cur_race_true and abs(cur_age_pred - cur_age_true) > age_threshold:
        ax.xaxis.label.set_color('red')
    
    ax.set_xlabel('a: {}, g: {}, r: {}'.format(int(age_pred[img_idx]),
                            dataset_dict['gender_id'][gender_pred[img_idx]],
                               dataset_dict['race_id'][race_pred[img_idx]]))
    
    ax.set_title('a: {}, g: {}, r: {}'.format(int(age_true[img_idx]),
                            dataset_dict['gender_id'][gender_true[img_idx]],
                               dataset_dict['race_id'][race_true[img_idx]]))
    ax.set_xticks([])
    ax.set_yticks([])
    
plt.tight_layout()
plt.savefig('preds.png')
'''