In [1]:
import os
import cv2
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split


In [2]:
import os, shutil

aligned_root = "utkface_aligned_cropped"
target_dir = "aligned_dataset"
os.makedirs(target_dir, exist_ok=True)

for subfolder in ["crop_part1", "UTKFace"]:
    folder = os.path.join(aligned_root, subfolder)
    for img in os.listdir(folder):
        src = os.path.join(folder, img)
        dst = os.path.join(target_dir, img)
        shutil.copy(src, dst)

print("✅ Combined aligned dataset into 'aligned_dataset/'")


✅ Combined aligned dataset into 'aligned_dataset/'


In [3]:
import os
import cv2
import numpy as np

dataset_dir = "aligned_dataset"
image_files = os.listdir(dataset_dir)

# Check a few random files
print("Sample filenames:", image_files[:5])

# Extract age labels and check some sample images
for img_name in image_files[:5]:
    try:
        age = int(img_name.split("_")[0])
        img_path = os.path.join(dataset_dir, img_name)
        img = cv2.imread(img_path)
        if img is None:
            print(f"⚠️ Failed to load: {img_name}")
        else:
            print(f"✅ {img_name} | Age: {age} | Shape: {img.shape}")
    except Exception as e:
        print(f"❌ Error processing {img_name}: {e}")


Sample filenames: ['100_0_0_20170112213500903.jpg.chip.jpg', '100_0_0_20170112215240346.jpg.chip.jpg', '100_1_0_20170110183726390.jpg.chip.jpg', '100_1_0_20170112213001988.jpg.chip.jpg', '100_1_0_20170112213303693.jpg.chip.jpg']
✅ 100_0_0_20170112213500903.jpg.chip.jpg | Age: 100 | Shape: (200, 200, 3)
✅ 100_0_0_20170112215240346.jpg.chip.jpg | Age: 100 | Shape: (200, 200, 3)
✅ 100_1_0_20170110183726390.jpg.chip.jpg | Age: 100 | Shape: (200, 200, 3)
✅ 100_1_0_20170112213001988.jpg.chip.jpg | Age: 100 | Shape: (200, 200, 3)
✅ 100_1_0_20170112213303693.jpg.chip.jpg | Age: 100 | Shape: (200, 200, 3)


In [4]:
image_paths = []
age_labels = []

for img_name in image_files:
    try:
        age = int(img_name.split("_")[0])
        img_path = os.path.join(dataset_dir, img_name)
        img = cv2.imread(img_path)
        if img is not None:
            image_paths.append(img_path)
            age_labels.append(age)
    except Exception as e:
        print(f"⚠️ Skipped {img_name}: {e}")

print(f"✅ Total valid images: {len(image_paths)}")


✅ Total valid images: 23709


In [5]:
from sklearn.model_selection import train_test_split

# Split 80% train, 20% test
train_paths, test_paths, train_ages, test_ages = train_test_split(
    image_paths, age_labels,
    test_size=0.2,
    random_state=42
)

print(f"✅ Training samples: {len(train_paths)}")
print(f"✅ Testing samples: {len(test_paths)}")


✅ Training samples: 18967
✅ Testing samples: 4742


In [6]:
IMG_SIZE = 64


In [7]:
def parse_image(filename, label):
    image = tf.io.read_file(filename)
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.resize(image, (IMG_SIZE, IMG_SIZE))
    image = image / 255.0  # normalize to [0, 1]
    return image, label


In [8]:
# Define age bins (10 bins: 0-10, 11-20, ..., 91+)
bin_edges = [0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 120]  # 10 bins

def age_to_bin(age):
    for i in range(len(bin_edges)-1):
        if bin_edges[i] <= age <= bin_edges[i+1]:
            return i
    return len(bin_edges) - 2

train_bins = [age_to_bin(age) for age in train_ages]
test_bins = [age_to_bin(age) for age in test_ages]

bin_labels = [
    "0-10", "11-20", "21-30", "31-40", "41-50",
    "51-60", "61-70", "71-80", "81-90", "91+"
]

print(f"✅ Converted ages to {len(bin_labels)} bins")


✅ Converted ages to 10 bins


In [9]:
# Recreate train dataset with bin labels
train_dataset = tf.data.Dataset.from_tensor_slices((train_paths, train_bins))
train_dataset = train_dataset.map(parse_image, num_parallel_calls=tf.data.AUTOTUNE)
train_dataset = train_dataset.shuffle(1000).batch(32).prefetch(tf.data.AUTOTUNE)

# Recreate test dataset with bin labels
test_dataset = tf.data.Dataset.from_tensor_slices((test_paths, test_bins))
test_dataset = test_dataset.map(parse_image, num_parallel_calls=tf.data.AUTOTUNE)
test_dataset = test_dataset.batch(32).prefetch(tf.data.AUTOTUNE)

print("✅ Rebuilt tf.data datasets with binned labels")


✅ Rebuilt tf.data datasets with binned labels


In [15]:
def assign_bin(age):
    if age <= 20:
        return 0  # 0-20
    elif age <= 40:
        return 1  # 21-40
    elif age <= 60:
        return 2  # 41-60
    else:
        return 3  # 61+


In [16]:
# Assuming you already have train_ages and test_ages as numpy arrays
train_bins = np.array([assign_bin(age) for age in train_ages])
test_bins = np.array([assign_bin(age) for age in test_ages])


In [17]:
def parse_image(filename, label):
    image = tf.io.read_file(filename)
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.resize(image, (IMG_SIZE, IMG_SIZE))
    image = image / 255.0
    return image, label

train_dataset = tf.data.Dataset.from_tensor_slices((train_paths, train_bins))
train_dataset = train_dataset.map(parse_image, num_parallel_calls=tf.data.AUTOTUNE)
train_dataset = train_dataset.shuffle(1000).batch(32).prefetch(tf.data.AUTOTUNE)

test_dataset = tf.data.Dataset.from_tensor_slices((test_paths, test_bins))
test_dataset = test_dataset.map(parse_image, num_parallel_calls=tf.data.AUTOTUNE)
test_dataset = test_dataset.batch(32).prefetch(tf.data.AUTOTUNE)

print("✅ Rebuilt tf.data datasets with 4-class age bins")


✅ Rebuilt tf.data datasets with 4-class age bins


In [18]:
from tensorflow.keras import layers, models

model = models.Sequential([
    layers.Conv2D(32, (3,3), activation='relu', input_shape=(IMG_SIZE, IMG_SIZE, 3)),
    layers.MaxPooling2D(2,2),
    layers.Conv2D(64, (3,3), activation='relu'),
    layers.MaxPooling2D(2,2),
    layers.Conv2D(128, (3,3), activation='relu'),
    layers.MaxPooling2D(2,2),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(4, activation='softmax')  # now 4 bins
])

model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

model.summary()


In [19]:
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

checkpoint = ModelCheckpoint(
    "age_model_best_4bins.h5", monitor="val_accuracy", verbose=1,
    save_best_only=True, mode="max"
)
early_stop = EarlyStopping(
    monitor="val_accuracy", patience=5, restore_best_weights=True, verbose=1
)

history = model.fit(
    train_dataset,
    validation_data=test_dataset,
    epochs=30,
    callbacks=[checkpoint, early_stop]
)


Epoch 1/30
[1m593/593[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step - accuracy: 0.5614 - loss: 1.0735
Epoch 1: val_accuracy improved from -inf to 0.71236, saving model to age_model_best_4bins.h5




[1m593/593[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 61ms/step - accuracy: 0.5615 - loss: 1.0733 - val_accuracy: 0.7124 - val_loss: 0.7437
Epoch 2/30
[1m592/593[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 54ms/step - accuracy: 0.6962 - loss: 0.7672
Epoch 2: val_accuracy improved from 0.71236 to 0.72670, saving model to age_model_best_4bins.h5




[1m593/593[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 59ms/step - accuracy: 0.6962 - loss: 0.7671 - val_accuracy: 0.7267 - val_loss: 0.6932
Epoch 3/30
[1m592/593[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 54ms/step - accuracy: 0.7244 - loss: 0.6922
Epoch 3: val_accuracy improved from 0.72670 to 0.74589, saving model to age_model_best_4bins.h5




[1m593/593[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 60ms/step - accuracy: 0.7244 - loss: 0.6922 - val_accuracy: 0.7459 - val_loss: 0.6335
Epoch 4/30
[1m592/593[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 53ms/step - accuracy: 0.7430 - loss: 0.6356
Epoch 4: val_accuracy improved from 0.74589 to 0.75538, saving model to age_model_best_4bins.h5




[1m593/593[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 59ms/step - accuracy: 0.7430 - loss: 0.6356 - val_accuracy: 0.7554 - val_loss: 0.6083
Epoch 5/30
[1m593/593[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step - accuracy: 0.7581 - loss: 0.6087
Epoch 5: val_accuracy improved from 0.75538 to 0.76234, saving model to age_model_best_4bins.h5




[1m593/593[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 58ms/step - accuracy: 0.7581 - loss: 0.6086 - val_accuracy: 0.7623 - val_loss: 0.5922
Epoch 6/30
[1m593/593[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step - accuracy: 0.7713 - loss: 0.5693
Epoch 6: val_accuracy did not improve from 0.76234
[1m593/593[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 59ms/step - accuracy: 0.7713 - loss: 0.5693 - val_accuracy: 0.7619 - val_loss: 0.5860
Epoch 7/30
[1m592/593[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 55ms/step - accuracy: 0.7795 - loss: 0.5392
Epoch 7: val_accuracy did not improve from 0.76234
[1m593/593[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 61ms/step - accuracy: 0.7795 - loss: 0.5392 - val_accuracy: 0.7617 - val_loss: 0.5984
Epoch 8/30
[1m592/593[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 56ms/step - accuracy: 0.7914 - loss: 0.5093
Epoch 8: val_accuracy did not improve from 0.76234
[1m593/593[0m [32m━━━━



[1m593/593[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 60ms/step - accuracy: 0.8088 - loss: 0.4702 - val_accuracy: 0.7676 - val_loss: 0.6078
Epoch 11/30
[1m593/593[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step - accuracy: 0.8178 - loss: 0.4361
Epoch 11: val_accuracy did not improve from 0.76761
[1m593/593[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 59ms/step - accuracy: 0.8178 - loss: 0.4361 - val_accuracy: 0.7670 - val_loss: 0.6249
Epoch 12/30
[1m592/593[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 54ms/step - accuracy: 0.8287 - loss: 0.4135
Epoch 12: val_accuracy improved from 0.76761 to 0.76782, saving model to age_model_best_4bins.h5




[1m593/593[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 60ms/step - accuracy: 0.8287 - loss: 0.4135 - val_accuracy: 0.7678 - val_loss: 0.6878
Epoch 13/30
[1m592/593[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 56ms/step - accuracy: 0.8305 - loss: 0.3994
Epoch 13: val_accuracy did not improve from 0.76782
[1m593/593[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 61ms/step - accuracy: 0.8306 - loss: 0.3993 - val_accuracy: 0.7661 - val_loss: 0.6487
Epoch 14/30
[1m593/593[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step - accuracy: 0.8409 - loss: 0.3765
Epoch 14: val_accuracy did not improve from 0.76782
[1m593/593[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 60ms/step - accuracy: 0.8409 - loss: 0.3765 - val_accuracy: 0.7642 - val_loss: 0.6856
Epoch 15/30
[1m592/593[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 55ms/step - accuracy: 0.8541 - loss: 0.3529
Epoch 15: val_accuracy did not improve from 0.76782
[1m593/593[0m [3



[1m593/593[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 61ms/step - accuracy: 0.8659 - loss: 0.3213 - val_accuracy: 0.7706 - val_loss: 0.7706
Epoch 18/30
[1m593/593[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step - accuracy: 0.8722 - loss: 0.2974
Epoch 18: val_accuracy did not improve from 0.77056
[1m593/593[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 60ms/step - accuracy: 0.8722 - loss: 0.2974 - val_accuracy: 0.7689 - val_loss: 0.7793
Epoch 19/30
[1m593/593[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step - accuracy: 0.8809 - loss: 0.2777
Epoch 19: val_accuracy did not improve from 0.77056
[1m593/593[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 61ms/step - accuracy: 0.8809 - loss: 0.2777 - val_accuracy: 0.7579 - val_loss: 0.8581
Epoch 20/30
[1m593/593[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 57ms/step - accuracy: 0.8732 - loss: 0.3022
Epoch 20: val_accuracy did not improve from 0.77056
[1m593/593[0m [3

In [20]:
loss, acc = model.evaluate(test_dataset)
print(f"✅ Final Test Accuracy: {acc * 100:.2f}%")


[1m149/149[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 23ms/step - accuracy: 0.7735 - loss: 0.7369
✅ Final Test Accuracy: 77.06%


In [21]:
model.save("final_age_model_4bins.h5")


