In [1]:
import os
import cv2
import numpy as np
import pandas as pd
from tqdm import tqdm
import matplotlib.pyplot as plt

In [2]:
files = os.listdir("../input/utkface-new/UTKFace/")
files[:5]

['26_0_2_20170104023102422.jpg.chip.jpg',
 '22_1_1_20170112233644761.jpg.chip.jpg',
 '21_1_3_20170105003215901.jpg.chip.jpg',
 '28_0_0_20170117180555824.jpg.chip.jpg',
 '17_1_4_20170103222931966.jpg.chip.jpg']

In [3]:
image_array = []
age_array = []
gender_array = []
file_path = []
file_name = []
labels = []
path = "../input/utkface-new/UTKFace/"

## loop through each files
for i in tqdm(range(len(files))):
    age = int(files[i].split("_")[0])
    gender = int(files[i].split("_")[1]) # 0 - male, 1 - female
    if (age <= 100) :
        # remove dataset for age group above 100
        age_array.append(age)
        gender_array.append(gender)
        file_path.append(path+"/"+files[i])
        labels.append([[age], [gender]])
        image = cv2.imread(path+files[i])
        # resize image (96, 96), ori is (200, 200)
        image = cv2.resize(image, (96, 96))
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image_array.append(image)
        
        

100%|██████████| 23708/23708 [02:28<00:00, 159.27it/s]


In [4]:
a, b = np.unique(age_array, return_counts="True")

In [5]:
a # age goup from 1 - 116

array([  1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,
        14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,
        27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,
        40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,  52,
        53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,  64,  65,
        66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,
        79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,
        92,  93,  95,  96,  99, 100])

In [6]:
len(a)

97

In [7]:
b # total number of image for each age group

array([1123,  482,  289,  273,  196,  131,  139,  263,  166,  156,   65,
        130,   81,  157,  177,  247,  158,  262,   98,  284,  346,  395,
        426,  859,  734, 2197,  615,  918,  570,  724,  350,  664,  143,
        409,  880,  483,  293,  325,  266,  526,  132,  266,  157,  100,
        440,  153,  170,  153,  148,  381,  138,  232,  241,  353,  268,
        236,   97,  271,   82,  293,  161,  125,  103,   50,  259,   77,
         94,  100,   56,  147,   33,   98,   63,   32,  148,   58,   28,
         69,   23,  133,   22,   40,   18,   24,  155,   35,   10,   34,
         33,   82,    2,   13,    5,    9,   17,    9,   11])

In [8]:
## convert image list to array
# divide each image by 255 to scale image from 0-255 to 0-1
image_array = np.array(image_array)/255.0

In [9]:
## convert label list to array
labels = np.array(labels)

In [10]:
from sklearn.model_selection import train_test_split
image_array, X_test, Y_train, Y_test = train_test_split(image_array, labels, test_size=0.2)


In [11]:
# split age and gender in labels
Y_train_split = [Y_train[:,1], Y_train[:,0]]
Y_test_split = [Y_test[:,1], Y_test[:,0]]

In [12]:
Y_train_split

[array([[1],
        [0],
        [0],
        ...,
        [0],
        [0],
        [0]]),
 array([[ 7],
        [62],
        [85],
        ...,
        [28],
        [63],
        [45]])]

In [13]:
## import all tensorflow keras library
from tensorflow.keras.layers import (
    Input,
    Conv2D, 
    MaxPool2D, 
    Dense, 
    BatchNormalization, 
    ReLU, 
    Dropout, 
    Flatten,
    Dropout,
    Concatenate,
    GlobalAvgPool2D
)

from tensorflow.keras.regularizers import L2
import tensorflow as tf

In [14]:
# create a new CNN model
def inception_module(inputs, f1, f2):
    x1 = Conv2D(f1,3,padding="same")(inputs)
    x1 = BatchNormalization()(x1)
    x1 = ReLU()(x1)
    
    x2 = Conv2D(f2,5,padding="same")(inputs)
    x2 = BatchNormalization()(x2)
    x2 = ReLU()(x2)
    
    # combine x1 x2
    return Concatenate()([x1, x2])

In [15]:
# define model
def build_model():
    inputs = Input((96, 96, 3))
    x = inception_module(inputs, 64, 32)
    x = MaxPool2D()(x)
    
    x = inception_module(x, 64, 32)
    x = MaxPool2D()(x)
    x = inception_module(x, 128, 32)
    x = MaxPool2D()(x)
    x = inception_module(x, 128, 32)
    x = MaxPool2D()(x)
    x = inception_module(x, 256, 64)
    x = MaxPool2D()(x)
    
    x = Flatten()(x)
    
    # gender
    dense_gender = Dense(64, activation="relu")(x)
    drop_gender = Dropout(0.3)(dense_gender)
    output_gender = Dense(1, activation="sigmoid", name="gender")(drop_gender)
    
    # age
    x = Dense(1024,kernel_regularizer=L2(l2=0.05))(x)
    x = BatchNormalization()(x)
    x = ReLU()(x)
    x = Dense(512,kernel_regularizer=L2(l2=0.02))(x)
    x = BatchNormalization()(x)
    x = ReLU()(x)
    x = Dense(128,kernel_regularizer=L2(l2=0.01))(x)
    x = BatchNormalization()(x)
    x = ReLU()(x)
    
    x = Dense(1)(x)
    
    # define model input and output
    model = tf.keras.Model(inputs=[inputs],outputs=[output_gender,x])
    return model

In [16]:
model=build_model()
model.summary()

2022-01-06 16:52:40.871009: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-01-06 16:52:40.969853: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-01-06 16:52:40.970565: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-01-06 16:52:40.971726: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compil

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 96, 96, 3)]  0                                            
__________________________________________________________________________________________________
conv2d (Conv2D)                 (None, 96, 96, 64)   1792        input_1[0][0]                    
__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (None, 96, 96, 32)   2432        input_1[0][0]                    
__________________________________________________________________________________________________
batch_normalization (BatchNorma (None, 96, 96, 64)   256         conv2d[0][0]                     
______________________________________________________________________________________________

In [17]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(),
    loss=['binary_crossentropy', 'mean_squared_error'],
    metrics=['mae']
)

# binary_crossentropy for gender
# mean_squared_error for age

In [18]:
ckp_path="trained_model/model"
model_checkpoint=tf.keras.callbacks.ModelCheckpoint(
    filepath = ckp_path,
    monitor = "val_dense_4_mae",
    save_best_only = True,
    save_weights_only = True,
    mode = "auto"
)

In [19]:
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(factor=0.9, monitor="val_dense_4_mae", mode="auto", cooldown=0, patience=5, varbose=1, min_lr=1e-5)

In [20]:
EPOCH=300
BATCH_SIZE=256
history=model.fit(image_array, Y_train_split, validation_data=(X_test, Y_test_split), batch_size=BATCH_SIZE, epochs=EPOCH, callbacks=[model_checkpoint, reduce_lr])

2022-01-06 16:52:47.136493: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 2095607808 exceeds 10% of free system memory.
2022-01-06 16:52:49.375340: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 2095607808 exceeds 10% of free system memory.
2022-01-06 16:52:50.918350: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)


Epoch 1/300


2022-01-06 16:52:53.705924: I tensorflow/stream_executor/cuda/cuda_dnn.cc:369] Loaded cuDNN version 8005




2022-01-06 16:53:12.393186: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 523984896 exceeds 10% of free system memory.
2022-01-06 16:53:12.953422: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 523984896 exceeds 10% of free system memory.


Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 31/300
Epoch 32/300
Epoch 33/300
Epoch 34/300
Epoch 35/300
Epoch 36/300
Epoch 37/300
Epoch 38/300
Epoch 39/300
Epoch 40/300
Epoch 41/300
Epoch 42/300
Epoch 43/300
Epoch 44/300
Epoch 45/300
Epoch 46/300
Epoch 47/300
Epoch 48/300
Epoch 49/300
Epoch 50/300
Epoch 51/300
Epoch 52/300
Epoch 53/300
Epoch 54/300
Epoch 55/300
Epoch 56/300
Epoch 57/300
Epoch 58/300
Epoch 59/300
Epoch 60/300
Epoch 61/300
Epoch 62/300
Epoch 63/300
Epoch 64/300
Epoch 65/300
Epoch 66/300
Epoch 67/300
Epoch 68/300
Epoch 69/300
Epoch 70/300
Epoch 71/300
Epoch 72/300
Epoch 73/300
Epoch 74/300
Epoch 75/300
Epoch 76/300
Epoch 77/300
Epoch 78/300
Epoch 7

In [21]:
model.load_weights("trained_model/model")

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7fe22810ec10>

In [22]:
prediction_val = model.predict(X_test, batch_size=BATCH_SIZE)

2022-01-06 17:58:42.215373: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 523984896 exceeds 10% of free system memory.


In [23]:
# gender prediction
prediction_val[0][:20]

array([[0.2577138 ],
       [0.0073769 ],
       [0.02688259],
       [0.4806348 ],
       [0.03805774],
       [0.96795726],
       [0.6816135 ],
       [0.4806348 ],
       [0.02565384],
       [0.55374295],
       [0.8817764 ],
       [0.03843454],
       [0.99239165],
       [0.8666123 ],
       [0.0065493 ],
       [0.07714579],
       [0.00954254],
       [0.9604273 ],
       [0.8234631 ],
       [0.9263038 ]], dtype=float32)

In [24]:
# ori gender
Y_test_split[0][:20]

array([[0],
       [0],
       [0],
       [1],
       [0],
       [1],
       [1],
       [0],
       [0],
       [1],
       [1],
       [0],
       [1],
       [0],
       [0],
       [0],
       [0],
       [1],
       [1],
       [1]])

In [25]:
# age prediction
prediction_val[1][:20]

array([[ 1.3440565],
       [40.28585  ],
       [65.2873   ],
       [ 0.7861817],
       [26.000626 ],
       [17.530182 ],
       [ 6.0731   ],
       [ 0.7017657],
       [68.58525  ],
       [74.868576 ],
       [91.44099  ],
       [31.15152  ],
       [33.82464  ],
       [ 7.3973036],
       [28.289726 ],
       [25.473608 ],
       [30.813986 ],
       [16.053417 ],
       [57.618042 ],
       [29.779861 ]], dtype=float32)

In [26]:
# ori age
Y_test_split[1][:20]

array([[ 1],
       [40],
       [58],
       [ 1],
       [24],
       [23],
       [ 4],
       [ 1],
       [52],
       [90],
       [95],
       [32],
       [59],
       [ 5],
       [35],
       [31],
       [31],
       [17],
       [80],
       [28]])

In [27]:
converter = tf.lite.TFLiteConverter.from_keras_model(model) 
tflite_model = converter.convert()

with open("model.tflite", "wb") as f:
    f.write(tflite_model)

2022-01-06 17:59:54.759691: W tensorflow/python/util/util.cc:348] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.
2022-01-06 18:00:00.149853: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-01-06 18:00:00.150237: I tensorflow/core/grappler/devices.cc:66] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 1
2022-01-06 18:00:00.150356: I tensorflow/core/grappler/clusters/single_machine.cc:357] Starting new session
2022-01-06 18:00:00.150907: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-01-06 18:00:00.151310: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had ne