# <center>Age and Gender Prediction</center>

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import os
from PIL import Image, ImageOps
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Activation, Dropout, Flatten, Dense
from keras import optimizers
from keras.models import load_model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import tensorflow as tf
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score

In [None]:
path = "crop_part1"
images = []
ages = []
genders = []

for i in os.listdir(path)[:10000]:
    split = i.split('_')
    ages.append(int(split[0]))
    genders.append(int(split[1]))
    
    with Image.open(os.path.join(path, i)) as img:
        images.append(img.copy())

In [None]:
images = pd.Series(list(images), name = "Images")
ages = pd.Series(list(ages), name = "Ages")
genders = pd.Series(list(genders), name = "Genders")

df = pd.concat([images, ages, genders], axis=1)
df

In [None]:
display(df["Images"][3])
print(df["Ages"][3], df["Genders"][3])

In [None]:
display(df["Images"][2])
print(df["Ages"][2], df["Genders"][2])

So 0 corresponds to male, 1 corresponds to female.

In [None]:
sns.set_theme()
sns.distplot(df["Ages"],kde=True, bins=30)

There are too many samples of faces from people aged 0 to 4. This could cause the model to overfit to these ages while underrepresenting others. To address this, I will include only one-third of the images from this age group.

In [None]:
under4s = []

for i in range(len(df)):
    if df["Ages"].iloc[i] <= 4:
        under4s.append(df.iloc[i])
under4s = pd.DataFrame(under4s)
under4s = under4s.sample(frac=0.3)

df = df[df["Ages"] > 4]

df = pd.concat([df, under4s], ignore_index = True)

In [None]:
sns.distplot(df['Ages'],kde=True, bins=30)

The dataset is now more representative of the population. However, there are very few images of individuals over 80 which could prevent the model from learning effectively for this age group. To address this, I will exclude samples over 80 and train the model only to predict ages below 80.

In [None]:
df = df[df['Ages'] < 80]

In [None]:
sns.distplot(df['Ages'],kde=True, bins=20)

In [None]:
age_bins = [0, 12, 18, 30, 50, 80]  
age_labels = ["Child (0-11)", "Teen (12-17)", "Young Adult (18-29)", "Adult (30-49)", "Senior (50-79)"]

df["AgeGroup"] = pd.cut(df["Ages"], bins=age_bins, labels=age_labels, right=False)

df["AgeGroupClass"] = df["AgeGroup"].cat.codes

sns.countplot(x=df['AgeGroupClass'], data=df)

In [None]:
sns.countplot(x="Genders", data=df)

There is a very small number of samples labeled with a third gender. Since this group is underrepresented, I will remove it from the dataset.

In [None]:
df = df[df['Genders'] != 3]
sns.countplot(x="Genders", data=df)

In [None]:
x = []
y = []

for i in range(len(df)):
    im_resized = df["Images"].iloc[i].resize((96, 96), Image.Resampling.LANCZOS)
    df["Images"].iloc[i] = im_resized
    
    ar = np.asarray(im_resized)
    x.append(ar)

    agegen = [int(df["AgeGroupClass"].iloc[i]), int(df["Genders"].iloc[i])]
    y.append(agegen)

x = np.array(x)
y = np.array(y)

In [None]:
y_age = df["AgeGroupClass"]
y_gender = df["Genders"]

x_train_age, x_test_age, y_train_age, y_test_age = train_test_split(x, y_age, test_size=0.2, stratify=y_age)
x_train_gender, x_test_gender, y_train_gender, y_test_gender = train_test_split(x, y_gender, test_size=0.2, stratify=y_gender)

In [None]:
agemodel = Sequential()
agemodel.add(Conv2D(32, (3,3), activation="relu", input_shape=(96, 96, 3)))
agemodel.add(MaxPooling2D((2,2)))
agemodel.add(Conv2D(64, (3,3), activation="relu"))
agemodel.add(MaxPooling2D((2,2)))
agemodel.add(Conv2D(128, (3,3), activation="relu"))
agemodel.add(MaxPooling2D((2,2)))
agemodel.add(Flatten())
agemodel.add(Dense(64, activation="relu"))
agemodel.add(Dropout(0.5))
agemodel.add(Dense(5, activation="softmax"))

agemodel.compile(loss="sparse_categorical_crossentropy",  
    optimizer=optimizers.Adam(learning_rate=0.0001),
    metrics=["accuracy"]
)


genmodel = Sequential()
genmodel.add(Conv2D(32, (3,3), activation="relu", input_shape=(96, 96, 3)))
genmodel.add(MaxPooling2D((2,2)))
genmodel.add(Conv2D(64, (3,3), activation="relu"))
genmodel.add(MaxPooling2D((2,2)))
genmodel.add(Conv2D(128, (3,3), activation="relu"))
genmodel.add(MaxPooling2D((2,2)))
genmodel.add(Flatten())
genmodel.add(Dense(64, activation="relu"))
genmodel.add(Dropout(0.5))
genmodel.add(Dense(1, activation="sigmoid"))

genmodel.compile(loss="binary_crossentropy",
             optimizer=optimizers.Adam(learning_rate=0.0001),
             metrics=["accuracy"])

In [None]:
datagen = ImageDataGenerator(
      rescale=1./255., width_shift_range = 0.1, height_shift_range = 0.1, horizontal_flip = True)

test_datagen = ImageDataGenerator(rescale=1./255)

train1 = datagen.flow(x_train_age, y_train_age, batch_size=32)

test1 = test_datagen.flow(
        x_test_age, y_test_age,
        batch_size=32)

agemodel.fit(train1, epochs=50, shuffle=True, validation_data=test1)

test_loss, test_mae = agemodel.evaluate(test1, verbose=1)

print("Final Test Loss:", test_loss)
print("Final Test MAE:", test_mae)

agemodel.save("age_model.h5")

In [None]:
datagen = ImageDataGenerator(
      rescale=1./255., width_shift_range = 0.1, height_shift_range = 0.1, horizontal_flip = True)

test_datagen = ImageDataGenerator(rescale=1./255)

train2 = datagen.flow(x_train_gender, y_train_gender, batch_size=64)

test2 = test_datagen.flow(
        x_test_gender, y_test_gender,
        batch_size=64)

genmodel.fit(train2, epochs=50, shuffle=True, validation_data=test2)

test_loss, test_accuracy = genmodel.evaluate(test2, verbose=1)

print("Final Test Loss:", test_loss)
print("Final Test Accuracy:", test_accuracy)

genmodel.save("gender_model.h5")


In [None]:
def process_and_predict(file, age_classes):
    im = Image.open(file)
    width, height = im.size

    if width != height:
        if width > height:
            left = width / 2 - height / 2
            right = width / 2 + height / 2
            top = 0
            bottom = height
        else:
            left = 0
            right = width
            top = 0
            bottom = width
        im = im.crop((left, top, right, bottom))

    im = im.resize((96, 96), Image.Resampling.LANCZOS)

    ar = np.asarray(im).astype("float32") / 255.0
    ar = ar.reshape(-1, 96, 96, 3)

    age_probs = agemodel.predict(ar)[0]             
    age_idx = np.argmax(age_probs)                  
    age_group = age_classes[age_idx]               

   
    gender_val = np.round(genmodel.predict(ar)[0][0])
    gender = "male" if gender_val == 0 else "female"

    
    prediction = {"age_group": age_group, "gender": gender}
    print(f"{file} -> Age Group: {age_group}, Gender: {gender}")

    return im, prediction



In [None]:
test_folder = "test"
results = {}

age_classes = ["Child", "Teen", "Young Adult", "Adult", "Senior"]

for filename in os.listdir(test_folder):
    if filename.lower().endswith((".jpg", ".png", ".jpeg")):
        img_path = os.path.join(test_folder, filename)
        
        im_resized, prediction = process_and_predict(img_path, age_classes)  
        
        results[filename] = {"image": im_resized, "prediction": prediction}
        
        print(f"{filename}: {prediction}")
        display(im_resized)  


In [None]:
model = load_model("age_model.h5")

converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()

with open("age_model.tflite", "wb") as f:
    f.write(tflite_model)

In [None]:
model = load_model("gender_model.h5")

converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()

with open("gender_model.tflite", "wb") as f:
    f.write(tflite_model)

In [None]:
age_interpreter = tf.lite.Interpreter(model_path="age_model.tflite")
gender_interpreter = tf.lite.Interpreter(model_path="gender_model.tflite")
emotion_interpreter = tf.lite.Interpreter(model_path="emotion_model.tflite")

age_interpreter.allocate_tensors()
gender_interpreter.allocate_tensors()
emotion_interpreter.allocate_tensors()

age_input, age_output = age_interpreter.get_input_details(), age_interpreter.get_output_details()
gender_input, gender_output = gender_interpreter.get_input_details(), gender_interpreter.get_output_details()
emotion_input, emotion_output = emotion_interpreter.get_input_details(), emotion_interpreter.get_output_details()

In [None]:
def process_and_predict_all(file, age_classes, emotion_classes):
    im = Image.open(file)
    width, height = im.size

    if width != height:
        if width > height:
            left = width / 2 - height / 2
            right = width / 2 + height / 2
            top = 0
            bottom = height
        else:
            left = 0
            right = width
            top = 0
            bottom = width
        im = im.crop((left, top, right, bottom))

    im = im.resize((96, 96), Image.Resampling.LANCZOS)

    ar = np.asarray(im).astype("float32") / 255.0
    ar = ar.reshape(-1, 96, 96, 3)

    age_interpreter.set_tensor(age_input[0]["index"], ar)
    age_interpreter.invoke()
    age_probs = age_interpreter.get_tensor(age_output[0]["index"])[0]  
    age_idx = np.argmax(age_probs)
    age_group = age_classes[age_idx]

    gender_interpreter.set_tensor(gender_input[0]["index"], ar)
    gender_interpreter.invoke()
    gender_pred = gender_interpreter.get_tensor(gender_output[0]["index"])[0][0]
    gender = "male" if gender_pred < 0.5 else "female"

    emotion_interpreter.set_tensor(emotion_input[0]["index"], ar)
    emotion_interpreter.invoke()
    emotion_probs = emotion_interpreter.get_tensor(emotion_output[0]["index"])[0]
    emotion_idx = np.argmax(emotion_probs)
    emotion = emotion_classes[emotion_idx]

    prediction = {"age_group": age_group, "gender": gender, "emotion": emotion}
    print(f"{file} -> Age Group: {age_group}, Gender: {gender}, Emotion: {emotion}")

    return im, prediction



In [None]:
test_folder = "test"
results = {}

age_classes = ["Child", "Teen", "Young Adult", "Adult", "Senior"]
emotion_classes = ["Happy", "Neutral", "Sad"] 

for filename in os.listdir(test_folder):
    if filename.lower().endswith((".jpg", ".png", ".jpeg")):
        img_path = os.path.join(test_folder, filename)
       
        im_resized, prediction = process_and_predict_all(img_path, age_classes, emotion_classes)

        results[filename] = {"image": im_resized, "prediction": prediction}

        plt.imshow(im_resized)
        plt.axis("off")
        plt.title(f"{filename} â†’ Age Group: {prediction['age_group']}, Gender: {prediction['gender']}, Emotion: {prediction['emotion']}")
        plt.show()

In [None]:
def predict_tflite(model_path, x_data, task="binary"):
    interpreter = tf.lite.Interpreter(model_path=model_path)
    interpreter.allocate_tensors()

    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()

    preds = []

    for i in range(len(x_data)):
        input_data = x_data[i].reshape(1, 96, 96, 3).astype(input_details[0]["dtype"])
        interpreter.set_tensor(input_details[0]["index"], input_data)
        interpreter.invoke()
        output_data = interpreter.get_tensor(output_details[0]["index"])
        preds.append(output_data[0])

    preds = np.array(preds)

    if task == "multi_class":
        preds = np.argmax(preds, axis=1)      
    elif task == "binary":
        preds = np.round(preds).astype(int).reshape(-1)  
    else:
        raise ValueError("task must be 'binary' or 'multi_class'")

    return preds


def evaluate_model(model_path, x_test, y_test, model_name="", task="binary"):
    preds = predict_tflite(model_path, x_test, task=task)

    precision = precision_score(y_test, preds, average="weighted", zero_division=0)
    recall = recall_score(y_test, preds, average="weighted", zero_division=0)
    f1 = f1_score(y_test, preds, average="weighted", zero_division=0)
    accuracy = accuracy_score(y_test, preds)

    print(f"  Results for {model_name} ({model_path}):")
    print(f"  Accuracy : {accuracy:.4f}")
    print(f"  Precision: {precision:.4f}")
    print(f"  Recall   : {recall:.4f}")
    print(f"  F1-Score : {f1:.4f}")
    print("-" * 40)


evaluate_model("age_model.tflite", x_test_age, y_test_age, model_name="Age Model", task="multi_class")

evaluate_model("gender_model.tflite", x_test_gender, y_test_gender, model_name="Gender Model", task="binary")