# Age Estimation and Gender Classification

Task: Train CNN models to estimate a person's age and gender by given a face image.

There are two CNN models:
- the first uses a custom architecture defined from scratch
- the second is a finetuned pretrained model (transfer learning)

**Dataset**

The models will be trained and validated on a folder `train_val/` containing 5,000 labeled face images (size: 128 x 128), originated from the UTKFace dataset.

**Performance metric**

The metrics for measuring the performance on the test set are:
- age estimation: MAE (Mean Absolute Error)
- gender classification: accuracy

## Setting Up: Mount the google drive
Need to mount google drive to the notebook to use google colab

Also need to **enable GPU** before training.


In [None]:
# Mounting the Google drive
from google.colab import drive
drive.mount('/content/drive')


In [None]:
# Checking the test images are correct
import os
folder = '/content/drive/MyDrive/Machine Learning 2/train_val'
num_files = len([f for f in os.listdir(folder)if os.path.isfile(os.path.join(folder, f))])
print("num of files in train_val/: %d" % (num_files))

In [None]:
import os
def load_images_from_folder(foldername):
    images = []
    for filename in os.listdir(foldername):
        img = os.path.join(foldername,filename)
        if img is not None:
            images.append(img)
    return images

img_list=load_images_from_folder(folder)

## Visualize a few photos
The section below displays some of the images from the dataset

In [None]:
import random as r
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import tensorflow as tf

n_pics = 25

maxpics = 5000
testpics = 50

# size of the image plot
plt.rcParams['figure.figsize'] = [10, 10]

# 25 random non repeating integers within the range of [0,4999]
picrange = r.sample(range(0,testpics), n_pics)

dataset_raw = np.array([np.asarray(Image.open(img_list[i])) for i in range(testpics)])


# show the images corresponding to these indexes
for i in range(n_pics):
    index = picrange[i]

    plt.subplot(5,5,i+1)
    plt.xticks([])
    plt.yticks([])
    plt.imshow(dataset_raw[index])

    #might need a better way to extract the information from the file name, but this works
    plt.title(str(img_list[index])[44:-31])


print(dataset_raw[0].shape)

## Rearrange the dataset
You may do any arrangement for the dataset to suit your later process, such as splitting into training set and validation set, saving the gender labels and age some how, and so on.


In [None]:
from pathlib import Path

# reading labels from the file name
# (AGE, GENEDER)
def getLabels(file_path):
    fileName = Path(file_path).stem  # separate file name
    split = fileName.split('_')
    #      (Age,           gender[0=M, 1=F])
    return int(split[0]), int(split[1])

# for a single given sample, make array to pass into dataframe
def getSingle(file_path):
    fileName = Path(file_path).stem  # separate file name
    split = fileName.split('_')
    #      [Age,        gender[0=M, 1=F], file path]
    return [int(split[0]), str(split[1]), file_path]



In [None]:
#turning the list of directories into a pandas dataframe
import numpy as np
import pandas as pd

fullData = [getSingle(file_path) for file_path in img_list]
df = pd.DataFrame(fullData, columns=['age', 'gender', 'directory'])

# Using Categorical one-hot encoded labels
# printing df will look different after this
# df['age'] = tf.keras.utils.to_categorical(df['age'])
# df['gender'] = tf.keras.utils.to_categorical(df['age'])

# changing dtypes
df.age = df.age.astype('float')
df.gender = df.gender.astype('float')
df.directory = df.directory.astype('string')
print(df.dtypes)

# print(df.head(5))
df.head(5)
df.count()


## STEP1: Data pre-processing
Pre-processing data feeding it into a CNN network.


In [None]:
# data visualization

import seaborn as sns
import matplotlib.pyplot as plt

print("Number of images = ", df.count())



age_freq = df['age'].value_counts()
print(age_freq)

sns.set_style("whitegrid")  # set a Seaborn style
plt.figure(figsize=(28,10))  # adjust figure size
ax = sns.countplot(data=df, x='age')

ticks = ax.get_xticks()
ax.set_xticklabels([int(tick) for tick in ticks])


plt.show()  # show the plot



In [None]:
# #random sampling in ages - for a more balanced data set when training

#0.3 sampling ages 0-1
newborns = []
for i in range(len(df)):
    if df['age'].iloc[i] <= 1:
        newborns.append(df.iloc[i])
newborns = pd.DataFrame(newborns)
newborns = newborns.sample(frac=0.3)

#0.2 sampling ages 24-32
youngAdults = []
for i in range(len(df)):
    if 24 <= df['age'].iloc[i] and df['age'].iloc[i] <= 26:
        youngAdults.append(df.iloc[i])
youngAdults = pd.DataFrame(youngAdults)
youngAdults = youngAdults.sample(frac=0.2)




df = df[df['age'] != 1]
df = pd.concat([df, newborns], ignore_index = True)
df = df[(24>df['age']) | (df['age']>26)]
df = pd.concat([df, youngAdults], ignore_index = True)

In [None]:
print("Number of images after sampling = ", df['age'].count())
num_files = df['age'].count()
sns.set_style("whitegrid")  # set a Seaborn style
plt.figure(figsize=(28,10))  # adjust figure size
ax = sns.countplot(data=df, x='age')
sns.displot(df['age'],kde=True, bins=20)

ticks = ax.get_xticks()
ax.set_xticklabels([int(tick) for tick in ticks])
plt.show()  # show the plot


In [None]:
import keras
from keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split


dir = ""
batch_size = 32
seed = 42
train_val_split = 0.25
n_train_files = int(num_files*(1-train_val_split))
n_val_files = int(num_files*train_val_split)




extremely_underrepresented = age_freq[age_freq == 1].index.tolist()

#Remove for stratify
extreme_df = pd.DataFrame()
for each in extremely_underrepresented:
  extreme_df = pd.concat([extreme_df, df[df['age'] == each]])
  df = df[df['age'] !=each]

#Training and validation split with 75% training, 25% validation
train_df, validation_df = train_test_split(df, test_size=0.25, random_state=seed, stratify = df['age'])
train_df = pd.concat([train_df, extreme_df])


#defining data augmentation on TRAINING dataset
train_aug_datagen=ImageDataGenerator(
    rescale=1./255.,      #rescaling pixel values to [0,1]
    # validation_split=train_val_split,

    rotation_range=10,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.1,
    zoom_range=0.1,
    horizontal_flip=True,
    fill_mode='nearest',
)
train_og_datagen=ImageDataGenerator(
    rescale=1./255.,      #rescaling pixel values to [0,1]
)

val_datagen=ImageDataGenerator(
    rescale=1./255.,      #rescaling pixel values to [0,1]
)

# create train generator
train_aug_generator = train_aug_datagen.flow_from_dataframe(
    dataframe=train_df,
    x_col='directory',  # column containing image filenames
    y_col=['age', 'gender'],  # column(s) containing label(s)
    target_size=(128, 128),
    batch_size=batch_size,
    color_mode='rgb',
    class_mode='multi_output',
    seed=seed,
)

train_og_generator = train_og_datagen.flow_from_dataframe(
    dataframe = train_df,
     x_col='directory',  # column containing image filenames
    y_col=['age', 'gender'],  # column(s) containing label(s)
    target_size=(128, 128),
    batch_size=batch_size,
    color_mode='rgb',
    class_mode='multi_output',
    # subset='training',
    seed=seed
)


# create validation generator
val_generator = val_datagen.flow_from_dataframe(
    dataframe=validation_df,
    x_col='directory',  # column containing image filenames
    y_col=['age', 'gender'],  # column(s) containing label(s)
    target_size=(128, 128),
    batch_size=batch_size,
    color_mode='rgb',
    class_mode='multi_output',
    # subset='validation',
    seed=seed
)

#combine augmented and original images
def combine_generator(gen1, gen2):
    complete = False
    while True:
        if complete:
            yield next(gen2)
        else:
            try:
                yield next(gen1)
            except StopIteration:
                complete = True
train_generator = combine_generator(train_aug_generator, train_og_generator)



In [None]:
extreme_df

In [None]:
#Testing to see if the augmented data is appropriate
x, y = train_aug_generator.next()
fig, axs = plt.subplots(nrows=4, ncols=8, figsize=(12, 6))
for i, ax in enumerate(axs.flat):
    ax.imshow(x[i])
    ax.axis('off')
plt.tight_layout()
plt.show()

## STEP2A: Build your own CNN network
Define own CNN for classifying the gender and predicting the age.
One CNN model is used for both tasks and thus has two outputs

There are a few restrictions about the network as follows.
1.	The input size must be 128 x 128 x 3
2.	The size of feature maps being fed to the first fully connected layer must be less than 10 x 10, while there is no number limitation about the depth.


In [None]:
from re import X
from keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, concatenate, Dropout, BatchNormalization, Add, ReLU, Activation
from keras.regularizers import l1, l2
from keras.models import Model


# input layer
input_layer = Input(shape=(128, 128, 3), name="Input")

# first convolutional layer
x = Conv2D(32, (3,3), name="32_3x3_Conv_1")(input_layer)
x = Activation('relu', name="ReLU_1")(x)
x = BatchNormalization(name="BatchNorm_1")(x)
x = MaxPooling2D(pool_size=(2, 2), name="2x2_MaxPool_1")(x)
# x = Dropout(0.2)(x)

# second convolutional layer
x = Conv2D(64, (3,3), name="64_3x3_Conv_2")(x)
x = Activation('relu', name="ReLU_2")(x)
x = BatchNormalization(name="BatchNorm_2")(x)

# third convolutional layer
x = Conv2D(128, (3,3), name="128_3x3_Conv_3")(x)
x = Activation('relu', name="ReLU_3")(x)
x = BatchNormalization(name="BatchNorm_3")(x)
x = MaxPooling2D(pool_size=(2, 2), name="2x2_MaxPool_3")(x)

# fourth convolutional layer
x = Conv2D(256, (3,3), name="256_3x3_Conv_4")(x)
x = Activation('relu', name="ReLU_4")(x)
x = BatchNormalization(name="BatchNorm_4")(x)


# fifth convolutional layer
x = Conv2D(512, (3,3), name="512_3x3_Conv_5")(x)
x = Activation('relu', name="ReLU_5")(x)
x = BatchNormalization(name="BatchNorm_5")(x)
x = MaxPooling2D(pool_size=(2, 2), name="2x2_MaxPool_5")(x)

# sixth convolutional layer
x = Conv2D(1024, (3,3), name="1024_3x3_Conv_6")(x)
x = Activation('relu', name="ReLU_6")(x)
x = BatchNormalization(name="BatchNorm_6")(x)




# flatten the output of the second convolutional layer
flat = Flatten(name="Flatten")(x)

# two output branches, 'age' and 'gender'
# age branch
a = Dense(256, activation='relu', name="256_Dense_1a_L1Reg", kernel_regularizer=l1(0.01))(flat)
a = Dropout(0.5, name="0.5_Dropout_1a")(a)
a = Dense(128, activation='relu', name="128_Dense_2a")(a)
a = Dense(1, activation='linear', name='age')(a)

# gender branch
g = Dense(256, activation='relu', name="256_Dense_1g_L1Reg", kernel_regularizer=l1(0.01))(flat)
g = Dropout(0.5, name="0.5_Dropout_1g")(g)
g = Dense(128, activation='relu', name="128_Dense_2g")(g)
g = Dense(1, activation='sigmoid', name='gender')(g)


# create the model
modelA = Model(inputs=input_layer, outputs=[a, g], name="Model1")


In [None]:
model_folder = '/content/drive/MyDrive/Machine Learning 2/models'
import os
if not os.path.exists(model_folder):
    os.mkdir(model_folder)
modelA.save(os.path.join(model_folder,"age_gender_A.h5"))

In [None]:
# modelA = tf.keras.models.load_model(os.path.join(model_folder,"age_gender_A.h5"))

from tensorflow.keras.utils import plot_model
plot_model(modelA,
           show_shapes=True,
           show_layer_activations=True,
           rankdir="TB",
           )

## STEP3A: Compile and train your model
Model is compiled and trained here.


In [None]:
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import SparseCategoricalCrossentropy
from tensorflow.keras.metrics import Accuracy
from tensorflow.keras.callbacks import LearningRateScheduler

# Compiling the model
modelA.compile(optimizer=Adam(learning_rate = 1e-4),
# modelA.compile(optimizer=Adam(),
    loss={
        # 'age'   : 'mean_squared_error',
        'age'   : 'mean_absolute_error',
        'gender': 'binary_crossentropy'
    },


    loss_weights={
        'age'   : 1,
        'gender': 20
    },

    metrics={
        'age'   : 'mae',
        'gender': 'accuracy'
    }
)

modelA.summary()

In [None]:
from tensorflow.keras.callbacks import EarlyStopping
early_stop = EarlyStopping(
    monitor="val_loss",
    min_delta=0.5,
    patience=5,
    verbose=1,
    mode="auto",
    baseline=None,
    restore_best_weights=False,
    start_from_epoch=0,
)


def lr_decay(epoch):
    initial_lr = 5e-4
    decay_rate = 0.95
    epochs_drop = 4
    lr = initial_lr * decay_rate**(epoch // epochs_drop)
    return lr

# Create the LearningRateScheduler callback
lr_scheduler = LearningRateScheduler(lr_decay)


In [None]:
# Training the model
from datetime import datetime


epochs = 128

STEP_SIZE_TRAIN = n_train_files//batch_size
STEP_SIZE_VALID = n_val_files//batch_size

print(n_train_files)
print("Epochs: {}\nBatch size: {}\nValidation split: {}".format(epochs, batch_size, train_val_split))



start_time = datetime.now()
history = modelA.fit(
    train_generator,
    epochs=epochs,
    validation_data=val_generator,
    steps_per_epoch=STEP_SIZE_TRAIN,
    validation_steps=STEP_SIZE_VALID,
    callbacks = [lr_scheduler, early_stop]
)
time_elapsed = datetime.now() - start_time

print('Time elapsed (hh:mm:ss.ms) {}'.format(time_elapsed))


model_folder = '/content/drive/MyDrive/Machine Learning 2/models'
import os
if not os.path.exists(model_folder):
    os.mkdir(model_folder)
modelA.save(os.path.join(model_folder,"age_gender_A.h5"))

## STEP4A: Draw the learning curves
1.	The loss of the gender classification over the training and validation set
2.	The accuracy of the gender classification over the training and validation set
3.	The loss of the age estimation over the training and validation set
4.	The MAE of the age estimation over the training and validation set


In [None]:
# plot the learning curves
import matplotlib.pyplot as plt

print("Epochs: {}\nBatch size: {}\nValidation split: {}".format(epochs, batch_size, train_val_split))

fig = plt.figure()
fig.add_subplot(2,2,1)
plt.plot(history.history['gender_loss'], label='train gender loss')
plt.plot(history.history['val_gender_loss'], label='val gender loss')
plt.legend()
plt.grid(True)
plt.ylim([0, 1.0])
plt.xlabel('epoch')

fig.add_subplot(2,2,2)
plt.plot(history.history['gender_accuracy'], label='gender accuracy')
plt.plot(history.history['val_gender_accuracy'], label='validation gender accuracy')
plt.legend()
plt.grid(True)
plt.ylim([0, 1.0])
plt.xlabel('epoch')

fig.add_subplot(2,2,3)
plt.plot(history.history['age_loss'], label='age loss')
plt.plot(history.history['val_age_loss'], label='validation age loss')
plt.legend()
plt.grid(True)
plt.ylim([0, 20.0])
plt.xlabel('epoch')

fig.add_subplot(2,2,4)
plt.plot(history.history['age_mae'], label='age mae')
plt.plot(history.history['val_age_mae'], label='validation age mae')
plt.legend()
plt.grid(True)
plt.ylim([0, 20.0])
plt.xlabel('epoch')

#epochs40 batchsize32 tvs25 decay08

## STEP2B: Build a CNN network based on a pre-trained model
Fine-tuning of a model that was pre-trained on the ImageNet data set.

Otherwise same restrictions as before.

In [None]:
from keras.applications.densenet import DenseNet121
pre_trained_model = DenseNet121(weights='imagenet', include_top = False)

In [None]:
pre_trained_model.summary()
from tensorflow.keras.utils import plot_model
plot_model(pre_trained_model,
           show_shapes=True,
           show_layer_activations=True,
           rankdir="TB",
          #  dpi=512
           )

In [None]:

from keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, concatenate, Dropout, BatchNormalization, Add, ReLU, Activation, GlobalAveragePooling2D
from keras.regularizers import l1, l2
from keras.models import Model

modelB = pre_trained_model
#Freezing pre-trained layers
for layer in modelB.layers:
  layer.trainable = False


adapted_input = Input(shape=(128, 128, 3), name="Input")
modelB = Model(adapted_input, modelB(adapted_input))

transfer_conv2D = modelB.output
transfer_conv2D = Conv2D(1024, (3,3), name="1024_3x3_Conv_2", padding = 'same')(transfer_conv2D)
transfer_conv2D = Activation('relu', name="ReLU_2")(transfer_conv2D)
transfer_conv2D = BatchNormalization(name="BatchNorm_2")(transfer_conv2D)

modelB = Model(modelB.input, transfer_conv2D)

transfer_flatten = Flatten(name="Flatten")
modelB = Model(modelB.input, transfer_flatten(modelB.output))



# two output branches, 'age' and 'gender'
# age branch
transfer_a = modelB.output
transfer_a = Dense(256, activation='relu', name="256_Dense_1a", kernel_regularizer=l1(0.05))(transfer_a)
transfer_a = Dropout(0.5, name="0.5_Dropout_1a")(transfer_a)

transfer_a = Dense(256, activation='relu', name="256_Dense_2a", kernel_regularizer=l1(0.05))(transfer_a)
transfer_a = Dense(128, activation='relu', name="128_Dense_3a", kernel_regularizer=l1(0.05))(transfer_a)
transfer_a = Dense(1, activation='linear', name='age')(transfer_a)

# gender branch
transfer_g = modelB.output
transfer_g = Dense(256, activation='relu', name="256_Dense_1g", kernel_regularizer=l1(0.1))(transfer_g)
transfer_g = Dropout(0.5, name="0.5_Dropout_1g")(transfer_g)
transfer_g = Dense(256, activation='relu', name="256_Dense_2g", kernel_regularizer=l1(0.1))(transfer_g)
transfer_g = Dense(128, activation='relu', name="128_Dense_3g", kernel_regularizer=l1(0.1))(transfer_g)
transfer_g = Dense(1, activation='sigmoid', name='gender')(transfer_g)

modelB = Model(modelB.input, [transfer_a,transfer_g])

modelB.summary()
from tensorflow.keras.utils import plot_model
plot_model(modelB, show_shapes=True)

## STEP3B: Compile and train your model
Compiling and training model


In [None]:

from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import SparseCategoricalCrossentropy
from tensorflow.keras.metrics import Accuracy
from tensorflow.keras.callbacks import LearningRateScheduler

# Compiling the model
modelB.compile(optimizer=Adam(learning_rate = 1e-4),
# modelA.compile(optimizer=Adam(),
    loss={
        # 'age'   : 'mean_squared_error',
        'age'   : 'mean_absolute_error',
        'gender': 'binary_crossentropy'
    },


    loss_weights={
        'age'   : 1,
        'gender': 20
    },

    metrics={
        'age'   : 'mae',
        'gender': 'accuracy'
    }
)

# modelB.summary()
# model_folder = '/content/drive/MyDrive/Machine Learning 2/models'
# import os
# if not os.path.exists(model_folder):
#     os.mkdir(model_folder)
# modelB.save(os.path.join(model_folder,"age_gender_B.h5"))

In [None]:
from tensorflow.keras.callbacks import EarlyStopping, Callback
early_stop = EarlyStopping(
    monitor="val_loss",
    min_delta=0.5,
    patience=5,
    verbose=1,
    mode="auto",
    baseline=None,
    restore_best_weights=False,
    start_from_epoch=0,
)


def lr_decay(epoch):
    initial_lr = 1e-3
    decay_rate = 0.96
    epochs_drop = 1
    lr = initial_lr * decay_rate**(epoch // epochs_drop)
    return lr

lr_scheduler = LearningRateScheduler(lr_decay)

#From keras.io
class Fine_tune(Callback):
    def on_train_begin(self, logs=None):
        self.model.trainable = True
        keras.backend.set_value(self.model.optimizer.lr, 0.0001) # set a new learning rate


In [None]:
# Training the model
from datetime import datetime


epochs = 128

STEP_SIZE_TRAIN = n_train_files//batch_size
STEP_SIZE_VALID = n_val_files//batch_size

print(n_train_files)
print("Epochs: {}\nBatch size: {}\nValidation split: {}".format(epochs, batch_size, train_val_split))



start_time = datetime.now()
history = modelB.fit(
    train_generator,
    epochs=epochs,
    validation_data=val_generator,
    steps_per_epoch=STEP_SIZE_TRAIN,
    validation_steps=STEP_SIZE_VALID,
    callbacks = [lr_scheduler, early_stop, Fine_tune()]
)
time_elapsed = datetime.now() - start_time

print('Time elapsed (hh:mm:ss.ms) {}'.format(time_elapsed))


# model_folder = '/content/drive/MyDrive/Machine Learning 2/models'
# import os
# if not os.path.exists(model_folder):
#     os.mkdir(model_folder)
# modelB.save(os.path.join(model_folder,"age_gender_B.h5"))

## STEP4B: Draw the learning curve
1.	The loss of the gender classification over the training and validation set
2.	The accuracy of the gender classification over the training and validation set
3.	The loss of the age estimation over the training and validation set
4.	The MAE of the age estimation over the training and validation set

In [None]:

# plot the learning curves
import matplotlib.pyplot as plt

print("Epochs: {}\nBatch size: {}\nValidation split: {}".format(epochs, batch_size, train_val_split))

fig = plt.figure()
fig.add_subplot(2,2,1)
plt.plot(history.history['gender_loss'], label='train gender loss')
plt.plot(history.history['val_gender_loss'], label='val gender loss')
plt.legend()
plt.grid(True)
plt.ylim([0, 1.0])
plt.xlabel('epoch')

fig.add_subplot(2,2,2)
plt.plot(history.history['gender_accuracy'], label='gender accuracy')
plt.plot(history.history['val_gender_accuracy'], label='validation gender accuracy')
plt.legend()
plt.grid(True)
plt.ylim([0, 1.0])
plt.xlabel('epoch')

fig.add_subplot(2,2,3)
plt.plot(history.history['age_loss'], label='age loss')
plt.plot(history.history['val_age_loss'], label='validation age loss')
plt.legend()
plt.grid(True)
plt.ylim([0, 20.0])
plt.xlabel('epoch')

fig.add_subplot(2,2,4)
plt.plot(history.history['age_mae'], label='age mae')
plt.plot(history.history['val_age_mae'], label='validation age mae')
plt.legend()
plt.grid(True)
plt.ylim([0, 20.0])
plt.xlabel('epoch')

#epochs40 batchsize32 tvs25 decay08