In [None]:
import pandas as pd
import numpy as np
import os
from sklearn.model_selection import train_test_split
from matplotlib import pyplot as plt
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Activation, Dropout, Flatten, Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import tensorflow as tf
import cv2

Hyper parameters

In [None]:
epochs = 20
width = height = 224

Prepare dataset

In [None]:
!pip install -q kaggle
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/

In [None]:
!kaggle datasets download -d jangedoo/utkface-new

In [None]:
!unzip -qq utkface-new.zip

In [None]:
images = [] # x
ages = [] # y

for image_name in os.listdir('crop_part1'):
    parts = image_name.split('_')
    ages.append(int(parts[0]))

    image = cv2.imread(f'crop_part1/{image_name}')
    image = cv2.resize(image, (width, height))
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    images.append(image)

In [None]:
images = pd.Series(images, name='Images')
ages = pd.Series(ages, name='Ages')

df = pd.concat([images, ages], axis=1)
df.head()

In [None]:
print(df['Ages'][0])
plt.imshow(df['Images'][0])

In [None]:
print(df['Ages'][1])
plt.imshow(df['Images'][1])

In [None]:
plt.figure(figsize=(18, 6))
plt.hist(df['Ages'], bins=df['Ages'].max())
plt.show()

Too many faces of people between 0 and 4 years old. The model would fit too well to these ages and not enough to the other ages. To resolve this I'm only going to include a third of the images between these ages.

In [None]:
under_4 = df[df['Ages'] <= 4]
under_4_small = under_4.sample(frac=0.3)

up_4 = df[df['Ages'] > 4]

df = pd.concat([under_4_small, up_4])

In [None]:
plt.figure(figsize=(18, 6))
plt.hist(df['Ages'], bins=df['Ages'].max())
plt.show()

This looks much better! The dataframe is more representative of the population now. However, there aren't many images of people over 80, which would cause the model to not train well enough on those ages. It's best to just remove over 80s and only have a model that can predict the ages of people under 80.

In [None]:
df = df[df['Ages'] < 80]

In [None]:
plt.figure(figsize=(18, 6))
plt.hist(df['Ages'], bins=df['Ages'].max())
plt.show()

In [None]:
X = np.array(df['Images'].values.tolist())
Y = np.array(df['Ages'].values.tolist())

In [None]:
X.shape

In [None]:
x_train, x_val, y_train, y_val = train_test_split(X, Y, test_size=0.2, stratify=Y)

print(x_train.shape)
print(y_train.shape)
print(x_val.shape)
print(y_val.shape)

In [None]:
data_generator = ImageDataGenerator(rescale=1./225,
                                    horizontal_flip=True)

train_data = data_generator.flow(x_train, y_train, batch_size=32)
val_data = data_generator.flow(x_val, y_val, batch_size=32)

Train

In [None]:
base_model = tf.keras.applications.MobileNetV2(
    input_shape=(width, height, 3),
    weights='imagenet',
    include_top=False,
    pooling='avg'
)

In [None]:
for layer in base_model.layers:
    layer.trainable = False

In [None]:
model = tf.keras.Sequential([
    base_model,
    Dropout(0.5),
    Dense(1, activation='relu')
])

In [None]:
model.compile(loss='mean_squared_error',
              optimizer=Adam(learning_rate=0.001))

In [None]:
model.fit(train_data,
          validation_data=val_data,
          epochs=epochs,
          shuffle=True)

Inference

In [None]:
!wget https://github.com/tzutalin/dlib-android/blob/master/data/shape_predictor_68_face_landmarks.dat

In [None]:
from imutils.face_utils import FaceAligner
import imutils
import dlib

detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor('shape_predictor_68_face_landmarks.dat')
fa = FaceAligner(predictor, desiredFaceWidth=256)


def process_and_predict(image_path):
    image = cv2.imread(image_path)
    image = imutils.resize(image, width=800)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    rects = detector(gray, 2)

    for i, rect in enumerate(rects):
        faceAligned = fa.align(image, gray, rect)

        faceAligned = cv2.resize(faceAligned, (width, height))
        faceAligned = cv2.cvtColor(faceAligned, cv2.COLOR_BGR2RGB)
        plt.imshow(faceAligned)

        faceAligned = faceAligned / 255.0
        faceAligned = np.expand_dims(faceAligned, axis=0)
        age = model.predict(faceAligned)
        print('Age:', int(age))

In [None]:
process_and_predict('/content/trump.jpg')