In [1]:
import pandas as pd
import numpy as np
import os
from PIL import Image
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
from tqdm.notebook import tqdm
warnings.filterwarnings('ignore')
%matplotlib inline

import tensorflow as tf
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import load_img
from tensorflow.keras.models import load_model
from keras.models import Sequential, Model
from keras.layers import Dense, Conv2D, Dropout, Flatten, MaxPooling2D, Input

In [2]:
datarute = 'UTKFace/' 

In [3]:
image_paths = []
age_labels = []
gender_labels = []

for filename in os.listdir(datarute):
    image_path = os.path.join(datarute, filename)
    temp = filename.split('_')
    age = int(temp[0])
    gender = int(temp[1])
    image_paths.append(image_path)
    age_labels.append(age)
    gender_labels.append(gender)

In [4]:
df = pd.DataFrame()
df['image'], df['age'], df['gender'] = image_paths, age_labels, gender_labels
df.head()

Unnamed: 0,image,age,gender
0,UTKFace/100_0_0_20170112213500903.jpg.chip.jpg,100,0
1,UTKFace/100_0_0_20170112215240346.jpg.chip.jpg,100,0
2,UTKFace/100_1_0_20170110183726390.jpg.chip.jpg,100,1
3,UTKFace/100_1_0_20170112213001988.jpg.chip.jpg,100,1
4,UTKFace/100_1_0_20170112213303693.jpg.chip.jpg,100,1


In [5]:
def extract_features(images):
    features = []
    for image in images:
        img = load_img(image, grayscale=True)
        img = img.resize((128, 128), Image.ANTIALIAS)
        img = np.array(img)
        features.append(img)
        
    features = np.array(features)
    # ignore this step if using RGB
    features = features.reshape(len(features), 128, 128, 1)
    return features


In [6]:
X = extract_features(df['image'])

In [7]:
X.shape

(23708, 128, 128, 1)

In [8]:
X = X/255.0

In [9]:
y_gender = np.array(df['gender'])
y_age = np.array(df['age'])

In [10]:
age_ranges = [(0, 2), (3, 5), (6, 12), (13, 17), (18, 24), (25, 34), (35, 44), (45, 54), (55, 64), (65, 74), (75, 100)]

def age_to_range(age):
    for i, age_range in enumerate(age_ranges):
        if age >= age_range[0] and age <= age_range[1]:
            return i
    return len(age_ranges) - 1

y_age_range = np.array([age_to_range(age) for age in y_age])


In [11]:
input_shape = (128, 128, 1)

In [12]:


# Define the input shape for the model
inputs = Input((input_shape))

# Convolutional layers
# First convolutional layer with 32 filters, 3x3 kernel size, and ReLU activation
conv_1 = Conv2D(32, kernel_size=(3, 3), activation='relu')(inputs)
# First max pooling layer with 2x2 pool size
maxp_1 = MaxPooling2D(pool_size=(2, 2))(conv_1)

# Second convolutional layer with 64 filters, 3x3 kernel size, and ReLU activation
conv_2 = Conv2D(64, kernel_size=(3, 3), activation='relu')(maxp_1)
# Second max pooling layer with 2x2 pool size
maxp_2 = MaxPooling2D(pool_size=(2, 2))(conv_2)

# Third convolutional layer with 128 filters, 3x3 kernel size, and ReLU activation
conv_3 = Conv2D(128, kernel_size=(3, 3), activation='relu')(maxp_2)
# Third max pooling layer with 2x2 pool size
maxp_3 = MaxPooling2D(pool_size=(2, 2))(conv_3)

# Fourth convolutional layer with 256 filters, 3x3 kernel size, and ReLU activation
conv_4 = Conv2D(256, kernel_size=(3, 3), activation='relu')(maxp_3)
# Fourth max pooling layer with 2x2 pool size
maxp_4 = MaxPooling2D(pool_size=(2, 2))(conv_4)

# Flatten the convolutional features to use them in dense layers
flatten = Flatten()(maxp_4)

# Fully connected (Dense) layers
# First dense layer with 256 neurons and ReLU activation
dense_1 = Dense(256, activation='relu')(flatten)
# Second dense layer with 256 neurons and ReLU activation
dense_2 = Dense(256, activation='relu')(flatten)

# Apply dropout (30% of neurons will be turned off) to prevent overfitting
dropout_1 = Dropout(0.3)(dense_1)
dropout_2 = Dropout(0.3)(dense_2)

# Output layers
# First output layer for gender prediction with 1 neuron and sigmoid activation
output_1 = Dense(1, activation='sigmoid', name='gender_out')(dropout_1)
# Second output layer for age prediction with 1 neuron and ReLU activation
num_age_ranges = len(age_ranges)
output_2 = Dense(num_age_ranges, activation='softmax', name='age_out')(dropout_2)


# Create the Keras model using the defined inputs and outputs
model = Model(inputs=[inputs], outputs=[output_1, output_2])

# Compile the model with the appropriate loss function and optimizer
# - 'binary_crossentropy' for gender output since it's a binary classification (male/female)
# - 'mae' (Mean Absolute Error) for age output since it's a regression problem
# - Use 'adam' optimizer (an adaptive optimizer that works well in most cases)
model.compile(loss=['binary_crossentropy', 'categorical_crossentropy'], optimizer='adam', metrics=['accuracy'])
y_age_range_one_hot = to_categorical(y_age_range, num_classes=num_age_ranges)

In [13]:
# plot the model
import pydot
print(pydot.__version__)

import graphviz
print(graphviz.__version__)

from tensorflow.keras.utils import plot_model
plot_model(model)

1.4.2
0.20.1
You must install pydot (`pip install pydot`) and install graphviz (see instructions at https://graphviz.gitlab.io/download/) for plot_model to work.


In [14]:
history = model.fit(x=X, y=[y_gender, y_age_range_one_hot], batch_size=32, epochs=30, validation_split=0.2)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [15]:
model.save('TrainedModelV3.h5')

In [16]:
image_index = 344
pred = model.predict(X[image_index].reshape(1, 128, 128, 1))
pred_gender = round(pred[0][0][0])
age_probs = pred[1][0]

print("Original Gender:", y_gender[image_index], "Original Age Range:", age_ranges[age_to_range(y_age[image_index])])
print("Predicted Gender:", pred_gender)
print("Probabilities for each age range:")

for i, prob in enumerate(age_probs):
    print("Age Range {}: {:0.2%}".format(age_ranges[i], prob))


Original Gender: 1 Original Age Range: (6, 12)
Predicted Gender: 1
Probabilities for each age range:
Age Range (0, 2): 0.00%
Age Range (3, 5): 0.13%
Age Range (6, 12): 99.81%
Age Range (13, 17): 0.01%
Age Range (18, 24): 0.05%
Age Range (25, 34): 0.00%
Age Range (35, 44): 0.00%
Age Range (45, 54): 0.00%
Age Range (55, 64): 0.00%
Age Range (65, 74): 0.00%
Age Range (75, 100): 0.00%
