# Age Prediction

In [1]:
import numpy as np
import pandas as pd
from pathlib import Path
import os.path
from sklearn.model_selection import train_test_split
import tensorflow as tf
from sklearn.metrics import r2_score
import tensorflow as tf
from tensorflow.keras.layers import Dense, Conv2D, GlobalAveragePooling2D, MaxPool2D
from tensorflow.keras.callbacks import EarlyStopping
from PIL import Image

In [3]:
images_directory = Path("age-data/20-50/20-50")

In [4]:
filepaths = pd.Series(list(images_directory.glob(r'**/*.jpg')), name='Filepath').astype(str)
ages = pd.Series(filepaths.apply(lambda x: os.path.split(os.path.split(x)[0])[1]), name='Age').astype(np.int)

images = pd.concat([filepaths, ages], axis=1).sample(frac=1.0, random_state=113).reset_index(drop=True)

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  ages = pd.Series(filepaths.apply(lambda x: os.path.split(os.path.split(x)[0])[1]), name='Age').astype(np.int)


In [5]:
images.head()

Unnamed: 0,Filepath,Age
0,age-data\20-50\20-50\train\48\129512.jpg,48
1,age-data\20-50\20-50\train\41\173949.jpg,41
2,age-data\20-50\20-50\test\31\44619.jpg,31
3,age-data\20-50\20-50\test\24\40079.jpg,24
4,age-data\20-50\20-50\train\46\147446.jpg,46


In [6]:
# We will use 8,000 images for training and 2,000 images for testing

image_df = images.sample(10000, random_state=1).reset_index(drop=True)

train_df, test_df = train_test_split(image_df, train_size=0.8, shuffle=True, random_state=123)

In [7]:
# We will use the ImageDataGenerator class modify the images to make the model more robust

train_generator = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale=1./255,
    validation_split=0.2,
    horizontal_flip=True,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2
)

test_generator = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale=1./255,
	validation_split=0.2,
    horizontal_flip=True,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2
)

In [8]:
# We will use the flow_from_dataframe method to load the images from the dataframe

train_data_generator = train_generator.flow_from_dataframe(
    dataframe=train_df,
    x_col='Filepath',
    y_col='Age',
    target_size=(120, 120),
    color_mode='rgb',
    class_mode='raw',
    batch_size=32,
    shuffle=True,
    seed=42,
    subset='training'
)

val_data_generator = train_generator.flow_from_dataframe(
    dataframe=train_df,
    x_col='Filepath',
    y_col='Age',
    target_size=(120, 120),
    color_mode='rgb',
    class_mode='raw',
    batch_size=32,
    shuffle=True,
    seed=42,
    subset='validation'
)

test_data_generator = test_generator.flow_from_dataframe(
    dataframe=test_df,
    x_col='Filepath',
    y_col='Age',
    target_size=(120, 120),
    color_mode='rgb',
    class_mode='raw',
    batch_size=32,
    shuffle=False
)

Found 6400 validated image filenames.
Found 1600 validated image filenames.
Found 2000 validated image filenames.


In [9]:
# Creating the model

model = tf.keras.Sequential()

model.add(Conv2D(filters=16, kernel_size=(3, 3), activation='relu', input_shape=(120, 120, 3)))
model.add(MaxPool2D())
model.add(Conv2D(filters=32, kernel_size=(3, 3), activation='relu'))
model.add(MaxPool2D())
model.add(GlobalAveragePooling2D())
model.add(Dense(64, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(1, activation='linear'))

model.compile(
    optimizer='adam',
    loss='mse'
)

history = model.fit(
    train_data_generator,
    validation_data=val_data_generator,
    epochs=100,
    callbacks=[
        EarlyStopping(
            monitor='val_loss',
            patience=20,
            restore_best_weights=True
        )
    ]
)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100


In [10]:
predicted_ages = np.squeeze(model.predict(test_data_generator))
true_ages = test_data_generator.labels

rmse = np.sqrt(model.evaluate(test_data_generator, verbose=0))
print("Test RMSE: {:.5f}".format(rmse))

r2 = r2_score(true_ages, predicted_ages)
print("Test R^2 Score: {:.5f}".format(r2))

Test RMSE: 8.99029
Test R^2 Score: -0.00132


In [11]:
model.save('age-model.h5')