In [None]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import *
from tensorflow.keras.models import *
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
from PIL import Image

In [None]:
data_path = "../data/raw/UTKFace/"

In [None]:
images = []
ages = []
genders = []

for file in os.listdir(data_path):
    split = file.split("_")
    ages.append(int(split[0]))
    genders.append(int(split[1]))
    images.append(file)

In [None]:
images = pd.Series(list(images), name="image")
ages = pd.Series(list(ages), name="age")
genders = pd.Series(list(genders), name="gender")

df = pd.concat([images, ages, genders], axis=1)
df.info()

In [None]:
img = Image.open(data_path + df.iloc[0, 0])

In [None]:
img.size

In [None]:
df.age.plot(kind="hist", bins=20)

In [None]:
df.gender.value_counts(normalize=True).plot(kind="pie", autopct="%1.1f%%", legend=True)

slightly more males than females

In [None]:
# discretize age by using 0-9, 10-19, 20-29, 30-39, 40-49, 50-59, 60-69, 70-79, 80-89, 90-99 ranges
df["age_discrete"] = pd.cut(
    df.age, bins=range(0, 120, 10), labels=range(0, 11), right=False
)

In [None]:
df.age_discrete.value_counts(normalize=True).plot(
    kind="pie", autopct="%1.1f%%", legend=True, figsize=(9, 9)
)

In [None]:
train_gen = ImageDataGenerator(rescale=1.0 / 255, validation_split=0.2)

In [None]:
classes = [
    "0-9",
    "10-19",
    "20-29",
    "30-39",
    "40-49",
    "50-59",
    "60-69",
    "70-79",
    "80-89",
]

In [None]:
num_classes = df.age_discrete.nunique()

In [None]:
df.age_discrete.head()

In [None]:
train_data = train_gen.flow_from_dataframe(
    dataframe=df,
    directory=data_path,
    x_col="image",
    y_col="age_discrete",
    target_size=(200, 200),
    batch_size=32,
    class_mode="raw",
    shuffle=False,
    subset="training",
)
val_data = train_gen.flow_from_dataframe(
    dataframe=df,
    directory=data_path,
    x_col="image",
    y_col="age_discrete",
    target_size=(200, 200),
    batch_size=32,
    class_mode="raw",
    shuffle=False,
    subset="validation",
)

In [None]:
agemodel = Sequential()
agemodel.add(Conv2D(32, (3, 3), activation="relu", input_shape=(200, 200, 3)))
agemodel.add(MaxPooling2D((2, 2)))
agemodel.add(Conv2D(64, (3, 3), activation="relu"))
agemodel.add(MaxPooling2D((2, 2)))
agemodel.add(Conv2D(128, (3, 3), activation="relu"))
agemodel.add(MaxPooling2D((2, 2)))
agemodel.add(Flatten())
agemodel.add(Dense(64, activation="relu"))
agemodel.add(Dropout(0.5))
agemodel.add(Dense(num_classes, activation="softmax"))

agemodel.compile(
    loss="sparse_categorical_crossentropy", optimizer=Adam(learning_rate=0.0001)
)

In [None]:
agemodel.fit(
    train_data,
    epochs=10,
    validation_data=val_data,
    callbacks=[EarlyStopping(patience=3)],
)

In [None]:
# some some predictions
preds = agemodel.predict(val_data)
preds = np.argmax(preds, axis=1)

In [None]:
preds