In [None]:
dataset_folder_name = 'TrainFiles/SplitTraining/B/Zero'
testset_folder_name_W = 'TestFiles/White'
testset_folder_name_B = 'TestFiles/Black'

TRAIN_TEST_SPLIT = 0.7
IM_WIDTH = IM_HEIGHT = 198

dataset_dict = {
    'race_id': {
        0: 'white', 
        1: 'black', 
        2: 'asian', 
        3: 'indian', 
        4: 'others'
    },
    'gender_id': {
        0: 'male',
        1: 'female'
    }
}

dataset_dict['gender_alias'] = dict((g, i) for i, g in dataset_dict['gender_id'].items())

import numpy as np 
import pandas as pd
import os
import glob
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import random

# Parsing

In [None]:
def parse_dataset(dataset_path, ext='jpg'):
    """
    Used to extract information about our dataset. It does iterate over all images and return a DataFrame with
    the data (age, gender and sex) of all files.
    """
    def parse_info_from_file(path):
        """
        Parse information from a single file
        """
        try:
            filename = os.path.split(path)[1]
            filename = os.path.splitext(filename)[0]
            age, gender, race, _ = filename.split('_')

            return int(age)
        except Exception as ex:
            return None, None, None
        
    files = glob.glob(os.path.join(dataset_path, "*.%s" % ext))
    random.shuffle(files)
    
    records = []
    for file in files:
        info = parse_info_from_file(file)
        records.append(info)
        
    df = pd.DataFrame(records)
    df['file'] = files
    df.columns = ['age', 'file']
    df = df.dropna()
    
    return df

In [None]:
df = parse_dataset(dataset_folder_name)
testset_W = parse_dataset(testset_folder_name_W)
testset_B = parse_dataset(testset_folder_name_B)
df.head()

# Makes Data Generator For Trainset

In [None]:
from keras.utils import to_categorical
from PIL import Image


class UtkFaceDataGenerator():
    def __init__(self, df):
        self.df = df
        
    def generate_split_indexes(self, SPLIT):
        p = np.random.permutation(len(self.df))
        train_up_to = int(len(self.df) * SPLIT)
        train_idx = p[:train_up_to]
        test_idx = p[train_up_to:]

        train_up_to = int(train_up_to * SPLIT)
        train_idx, valid_idx = train_idx[:train_up_to], train_idx[train_up_to:]
        
        # converts alias to id
        self.max_age = self.df['age'].max()
        
        return train_idx, valid_idx, test_idx
    
    def preprocess_image(self, img_path):
        im = Image.open(img_path)
        im = im.resize((IM_WIDTH, IM_HEIGHT))
        im = np.array(im) / 255.0
        
        return im
        
    def generate_images(self, image_idx, is_training, batch_size=16):
        
        # arrays to store our batched data
        images, ages = [], []
        while True:
            for idx in image_idx:
                person = self.df.iloc[idx]
                
                age = person['age']
                file = person['file']
                
                im = self.preprocess_image(file)
                ages.append(age / self.max_age)
                images.append(im)
                
                # yielding condition
                if len(images) >= batch_size:
                    yield np.array(images), [np.array(ages)]
                    images, ages = [], []
                    
            if not is_training:
                break
                
data_generator = UtkFaceDataGenerator(df)
train_idx, valid_idx, test_idx = data_generator.generate_split_indexes(TRAIN_TEST_SPLIT)

# Data Generator TestSet

In [None]:
TEST_DATA_SPLIT = 0
#White
test_data_generator_W = UtkFaceDataGenerator(testset_W)
train_idx_test_W, valid_idx_test_W, test_idx_test_W = test_data_generator_W.generate_split_indexes(TEST_DATA_SPLIT)
#Black
test_data_generator_B = UtkFaceDataGenerator(testset_B)
train_idx_test_B, valid_idx_test_B, test_idx_test_B = test_data_generator_B.generate_split_indexes(TEST_DATA_SPLIT)







# # Making Model

In [None]:
from keras.models import Model
from keras.layers.normalization import BatchNormalization
from keras.layers.convolutional import Conv2D
from keras.layers.convolutional import MaxPooling2D
from keras.layers.core import Activation
from keras.layers.core import Dropout
from keras.layers.core import Lambda
from keras.layers.core import Dense
from keras.layers import Flatten
from keras.layers import Input
import tensorflow as tf

class UtkMultiOutputModel():
    def make_default_hidden_layers(self, inputs):
        x = Conv2D(16, (3, 3), padding="same")(inputs)
        x = Activation("relu")(x)
        x = BatchNormalization(axis=-1)(x)
        x = MaxPooling2D(pool_size=(3, 3))(x)
        x = Dropout(0.25)(x)

        x = Conv2D(32, (3, 3), padding="same")(x)
        x = Activation("relu")(x)
        x = BatchNormalization(axis=-1)(x)
        x = MaxPooling2D(pool_size=(2, 2))(x)
        x = Dropout(0.25)(x)

        x = Conv2D(32, (3, 3), padding="same")(x)
        x = Activation("relu")(x)
        x = BatchNormalization(axis=-1)(x)
        x = MaxPooling2D(pool_size=(2, 2))(x)
        x = Dropout(0.25)(x)

        return x

    def build_race_branch(self, inputs, num_races):
        x = self.make_default_hidden_layers(inputs)

        x = Flatten()(x)
        x = Dense(128)(x)
        x = Activation("relu")(x)
        x = BatchNormalization()(x)
        x = Dropout(0.5)(x)
        x = Dense(num_races)(x)
        x = Activation("softmax", name="race_output")(x)

        return x

    def build_gender_branch(self, inputs, num_genders=2):
        x = Lambda(lambda c: tf.image.rgb_to_grayscale(c))(inputs)

        x = self.make_default_hidden_layers(inputs)

        x = Flatten()(x)
        x = Dense(128)(x)
        x = Activation("relu")(x)
        x = BatchNormalization()(x)
        x = Dropout(0.5)(x)
        x = Dense(num_genders)(x)
        x = Activation("sigmoid", name="gender_output")(x)

        return x

    def build_age_branch(self, inputs):   
        x = self.make_default_hidden_layers(inputs)

        x = Flatten()(x)
        x = Dense(128)(x)
        x = Activation("relu")(x)
        x = BatchNormalization()(x)
        x = Dropout(0.5)(x)
        x = Dense(1)(x)
        x = Activation("linear", name="age_output")(x)

        return x

    def assemble_full_model(self, width, height):
        input_shape = (height, width, 3)

        inputs = Input(shape=input_shape)

        age_branch = self.build_age_branch(inputs)

        model = Model(inputs=inputs,
                     outputs = [age_branch],
                     name="face_net")

        return model
    
model = UtkMultiOutputModel().assemble_full_model(IM_WIDTH, IM_HEIGHT)

# Training

In [None]:
from keras.optimizers import Adam

init_lr = 1e-4
epochs = 100

opt = Adam(lr=init_lr, decay=init_lr / epochs)

model.compile(optimizer=opt, 
              loss={
                  'age_output': 'mse'},
              loss_weights={ 
                  'age_output': 4.},
              metrics={
                  'age_output': 'mae'})

valid_batch_size = 32
print(len(valid_idx))
print(len(valid_idx)//valid_batch_size)

In [None]:
from keras.callbacks import ModelCheckpoint

batch_size = 32
valid_batch_size = 32
train_gen = data_generator.generate_images(train_idx, is_training=True, batch_size=batch_size)
valid_gen = data_generator.generate_images(valid_idx, is_training=True, batch_size=valid_batch_size)

callbacks = [
    ModelCheckpoint("./model_checkpoint", monitor='val_loss')
]

history = model.fit_generator(train_gen,
                    steps_per_epoch=len(train_idx)//batch_size,
                    epochs=epochs,
                    callbacks=callbacks,
                    validation_data=valid_gen,
                    validation_steps=len(valid_idx)//valid_batch_size)

In [None]:
import plotly.graph_objects as go
from IPython.display import display, Image

### Overall loss

# Testing

In [None]:
from PIL import Image
from sklearn.metrics import r2_score
from sklearn.metrics import explained_variance_score
test_batch_size = 128


# WHITE

In [None]:
test_generator = test_data_generator_W.generate_images(test_idx_test_W, is_training=False, batch_size=test_batch_size)
age_pred = model.predict_generator(test_generator, steps=len(test_idx_test_W)//test_batch_size)



test_generator = test_data_generator_W.generate_images(test_idx_test_W, is_training=False, batch_size=test_batch_size)
samples = 0
images, age_true = [], []
for test_batch in test_generator:
    image = test_batch[0]
    labels = test_batch[1]
    
    images.extend(image)
    age_true.extend(labels[0])

age_true = np.array(age_true)

age_true = age_true * data_generator.max_age
age_pred = age_pred * data_generator.max_age


from sklearn.metrics import classification_report

'''cr_age = classification_report(age_true, age_pred, target_names=dataset_dict['gender_alias'].keys())
print(cr_age)'''
print('R2 score for age: ', r2_score(age_true, age_pred))
print('Deviance Score for age:', explained_variance_score(age_true, age_pred))

# BLACK

In [None]:
test_generator = test_data_generator_B.generate_images(test_idx_test_B, is_training=False, batch_size=test_batch_size)
age_pred = model.predict_generator(test_generator, steps=len(test_idx_test_B)//test_batch_size)



test_generator = test_data_generator_B.generate_images(test_idx_test_B, is_training=False, batch_size=test_batch_size)
samples = 0
images, age_true = [], []
for test_batch in test_generator:
    image = test_batch[0]
    labels = test_batch[1]
    
    images.extend(image)
    age_true.extend(labels[0])

age_true = np.array(age_true)

age_true = age_true * (data_generator.max_age)
age_pred = age_pred * (data_generator.max_age)


from sklearn.metrics import classification_report

'''cr_age = classification_report(age_true, age_pred, target_names=dataset_dict['gender_alias'].keys())
print(cr_age)'''
print('R2 score for age: ', r2_score(age_true, age_pred))




In [None]:
print(age_true)
print(age_pred)

In [None]:
print('Deviance Score for age:', explained_variance_score(age_true, age_pred))