## [3] Prediction of the Age and Gender Based on Human Face Images Based on Deep Learning Algorithm
<img src="images/[3].png"> <br>

First we need to import used libraries:

In [None]:
import tensorflow as tf, pandas as pd, matplotlib.pyplot as plt, numpy as np, os, sys
from keras.models import Sequential, Model
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout,  BatchNormalization, DepthwiseConv2D, ReLU, AvgPool2D, Input
from keras.utils import load_img, plot_model
from keras.applications.mobilenet import MobileNet
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
# from helper.prepare_data import prepare_data


First, we pick the dataset that we would like to use from:
<br>1 - Age, gender, ethnicity CSV
<br>2 - UTKFace
<br>3 - Fairface

In [None]:
import numpy as np, pandas as pd, os
from keras.utils import load_img
from PIL import Image, ImageOps

def prepare_data(choice):
    match choice:
        case 1:
            df = pd.read_csv(os.getcwd() + "//datasets//age_gender.csv")

            # image size originally is 48
            img_size = 48

            # First split each pixel value and convert to float, only then we can normalize values of pixels from 0 - 255 to 0 - 1:
            df['pixels'] = df['pixels'].apply(lambda x: np.array(x.split(), dtype = "float32"))
            df['pixels'] = df['pixels'].apply(lambda x: x / 255)

            x = np.array(df['pixels'].tolist())
            # x.shape[0] = 23705, those are number of entries in db file, last argument is 1 if greyscale, 3 if rgb images
            x = x.reshape(x.shape[0], img_size, img_size, 1)

            x = np.array(x)
            y_age = np.array(df['age'])
            y_ethnicity = np.array(df['ethnicity'])
            y_age = np.array(df['gender'])
            return x, y_age, y_age, y_ethnicity, img_size
        case 2:
            directory = os.getcwd() + '//datasets//UTKFace//'

            # image size originally is 200
            img_size = 200
            
            # lists for storing labels
            image_paths = []
            age_labels = []
            gender_labels = []

            for filename in os.listdir(directory):
                image_path = os.path.join(directory, filename)
                temp = filename.split('_')
                age = int(temp[0])
                gender = int(temp[1])
                image_paths.append(image_path)
                age_labels.append(age)
                gender_labels.append(gender)

            df = pd.DataFrame()
            df['image'], df['age'], df['gender'] = image_paths, age_labels, gender_labels

            x = []
            for image in df['image']:
                    img = load_img(image, color_mode = "grayscale")
                    # img = load_img(image, grayscale=False)
                    img = img.resize((img_size, img_size), 3)
                    img = np.array(img)
                    x.append(img)

            x = np.array(x)
            x = x.reshape(len(x), img_size, img_size, 1)
            # x = x.reshape(len(x), img_size, img_size, 3)
            x = x/255.0

            y_age = np.array(df['age'])
            y_gender = np.array(df['gender'])
            if 'ethnicity' in df.columns:
                y_ethnicity = np.array(df['ethnicity'])
            else:
                y_ethnicity = np.empty(0)
            return x, y_age, y_gender, y_ethnicity, img_size
        case 3:
            directory = os.getcwd() + '//datasets//Fairface//val'

            # image size originally is 224
            img_size = 224
            
            x = []

            for filename in os.listdir(directory):
                image_path = os.path.join(directory, filename)
                img = load_img(image_path, color_mode = "grayscale")
                img = img.resize((img_size, img_size), 3)
                img = np.array(img)
                x.append(img)

            x = np.array(x)
            x = x.reshape(len(x), img_size, img_size, 1)
            x = x/255.0

            df = pd.read_csv(os.getcwd() + "//datasets//Fairface//fairface_label_val.csv")
            y_age = []
            y_gender = []
            y_ethnicity = [] 

            for entry in np.array(df['age']):
                if entry == '0-2':
                    y_age.append(0)
                elif entry == '3-9':
                    y_age.append(1)
                elif entry == '10-19':
                    y_age.append(2)
                elif entry == '20-29':
                    y_age.append(3)
                elif entry == '30-39':
                    y_age.append(4)
                elif entry == '40-49':
                    y_age.append(5)
                elif entry == '50-59':
                    y_age.append(6)
                elif entry == '60-69':
                    y_age.append(7)
                elif entry == 'more than 70':
                    y_age.append(8)

            for entry in np.array(df['gender']):
                if entry == "Male":
                    y_gender.append(0)
                elif entry == "Female":
                    y_gender.append(1)

            for entry in np.array(df['race']):
                if entry == "White":
                    y_ethnicity.append(0)
                elif entry == "Black":
                    y_ethnicity.append(1)
                elif entry == "Latino_Hispanic":
                    y_ethnicity.append(2)
                elif entry == "East":
                    y_ethnicity.append(3)
                elif entry == "Southeast Asian":
                    y_ethnicity.append(4)
                elif entry == "Indian":
                    y_ethnicity.append(5)
                elif entry == "Middle Eastern":
                    y_ethnicity.append(6)

            y_age = np.array(y_age)
            y_gender = np.array(y_gender)
            y_ethnicity = np.array(y_ethnicity)

            return x, y_age, y_gender, y_ethnicity, img_size
        case _:
            return 0,0,0,0,0

In [None]:
choice = 2

Prepare data from each dataset so that we end up with normalized pixels and labels:

In [None]:
x, y_age, y_gender, y_ethnicity, img_size = prepare_data(choice = choice)

Display some exemplary images:

In [None]:
# size of the whole grid of images
plt.figure(figsize=(10, 10))

# display first 25 images in a 5x5 grid
for i in range(0, 25):
    plt.subplot(5, 5, (i % 25) + 1)
    plt.grid(False)
    #disable x and y axis description
    plt.xticks([])
    plt.yticks([])
    # plt.imshow(x[i].reshape(img_size, img_size, 3), cmap='gray')
    plt.imshow(x[i].reshape(img_size, img_size), cmap='gray')
    # A = age, G = gender, E = ethnicity
    plt.xlabel("A: "+ str(y_age[i]) + " G: " + str(y_gender[i]) + (" E: " + str(y_ethnicity[i]) if y_ethnicity.size > 0 else ""))
plt.show()

Build the model:

In [None]:
# input_shape = (64, 64, 3)

# inputs = Input((input_shape))
# # convolutional layers
# conv_1 = Conv2D(32, kernel_size=(3, 3), activation='relu') (inputs)
# maxp_1 = MaxPooling2D(pool_size=(2, 2)) (conv_1)
# conv_2 = Conv2D(64, kernel_size=(3, 3), activation='relu') (maxp_1)
# maxp_2 = MaxPooling2D(pool_size=(2, 2)) (conv_2)
# conv_3 = Conv2D(128, kernel_size=(3, 3), activation='relu') (maxp_2)
# maxp_3 = MaxPooling2D(pool_size=(2, 2)) (conv_3)
# conv_4 = Conv2D(128, kernel_size=(3, 3), activation='relu') (maxp_3)
# maxp_4 = MaxPooling2D(pool_size=(2, 2)) (conv_4)

# flatten_age = Flatten() (maxp_4)
# flatten_gender = Flatten() (maxp_3)

# # fully connected layers
# dense_1 = Dense(128, activation='relu') (flatten_gender)
# dense_2 = Dense(128, activation='relu') (flatten_age)

# dropout_1 = Dropout(0.3) (dense_1)
# dropout_2 = Dropout(0.3) (dense_2)

# output_1 = Dense(1, activation='sigmoid', name='gender_out') (dropout_1)
# output_2 = Dense(1, activation='relu', name='age_out') (dropout_2)

# model = Model(inputs=[inputs], outputs=[output_1, output_2])

# model.compile(loss=['binary_crossentropy', 'mae'], optimizer='adam', metrics=['accuracy'])

In [None]:
inputs = Input((img_size, img_size, 1))

# layer 1
layer = Conv2D(64, kernel_size = (3, 3), activation = 'relu', name = 'conv2d_1') (inputs)
# layer = BatchNormalization() (layer)
layer = MaxPooling2D(pool_size = (2, 2)) (layer)
# layer = Dropout(0.5) (layer)

layer = Conv2D(128, kernel_size = (3, 3), activation = 'relu', name = 'conv2d_2') (layer)
# layer = BatchNormalization() (layer)
layer = MaxPooling2D(pool_size = (2, 2)) (layer)
# layer = Dropout(0.5) (layer)

layer = Conv2D(256, kernel_size = (3, 3), activation = 'relu', name = 'conv2d_3') (layer)
# layer = BatchNormalization() (layer)
# layer = MaxPooling2D(pool_size = (2, 2)) (layer)
# layer = Dropout(0.5) (layer)

layer = Conv2D(256, kernel_size = (3, 3), activation = 'relu', name = 'conv2d_4') (layer)
# layer = BatchNormalization() (layer)
layer = MaxPooling2D(pool_size = (2, 2)) (layer)
# layer = Dropout(0.5) (layer)

layer = Conv2D(512, kernel_size = (3, 3), activation = 'relu', name = 'conv2d_5') (layer)
# layer = BatchNormalization() (layer)
# layer = MaxPooling2D(pool_size = (2, 2)) (layer)
# layer = Dropout(0.5) (layer)

layer = Conv2D(512, kernel_size = (3, 3), activation = 'relu', name = 'conv2d_6') (layer)
# layer = BatchNormalization() (layer)
layer = MaxPooling2D(pool_size = (2, 2)) (layer)
# layer = Dropout(0.5) (layer)

layer = Conv2D(512, kernel_size = (3, 3), activation = 'relu', name = 'conv2d_7') (layer)
# layer = BatchNormalization() (layer)
# layer = MaxPooling2D(pool_size = (2, 2)) (layer)
# layer = Dropout(0.5) (layer)

layer = Conv2D(512, kernel_size = (3, 3), activation = 'relu', name = 'conv2d_8') (layer)
# layer = BatchNormalization() (layer)
layer = MaxPooling2D(pool_size = (2, 2)) (layer)
# layer = Dropout(0.5) (layer)

layer = Flatten() (layer)

Dense_1 = Dense(4096, activation='relu') (layer)
# layer = BatchNormalization() (layer)
# layer = Dropout(0.5) (layer)
Dense_2 = Dense(4096, activation='relu') (Dense_1)

Dense_output_1 = Dense(1, activation='softmax') (Dense_2)

model = Model(inputs = [inputs], outputs = Dense_output_1)
model.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy', 'mae'])
model.summary()

In [None]:
history = model.fit(x, y_gender, epochs = 10, validation_split = 0.2)

In [None]:
acc = history.history['accuracy']
val_acc = history.history['val__accuracy']
epochs = range(len(acc))

plt.plot(epochs, acc, 'b', label='Training Accuracy')
plt.plot(epochs, val_acc, 'r', label='Validation Accuracy')
plt.title('Accuracy Graph')
plt.legend()
plt.figure()