<a href="https://colab.research.google.com/github/Farihaz2812/Cross-age-celebrity-dataset/blob/main/Cross_Age_Celebrity_Dataset.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>


**CROSS AGE CELEBRITY DATASET**



Mounting Google Drive


In [None]:
from google.colab import drive
drive.mount('/content/drive/')

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


Point to your dataset folder

In [None]:
import os

# Path to dataset in Drive
DATA_DIR = "/content/drive/MyDrive/CACD"

# Check dataset
print("Number of files:", len(os.listdir(DATA_DIR)))
print("Sample files:", os.listdir(DATA_DIR)[:10])

Number of files: 2
Sample files: ['crop_part1', '.ipynb_checkpoints']


Parse filenames into a DataFrame

In [None]:
import glob, pandas as pd

files = glob.glob(os.path.join(DATA_DIR, "crop_part1", "**/*"), recursive=True)
rows=[]

def parse_filename(f):
    fn = os.path.basename(f)
    try:
        age, gender, race = fn.split("_")[:3]
        return int(age), int(gender), int(race)
    except:
        return None

for f in files:
    result = parse_filename(f)
    if result:
        age, gender, race = result
        rows.append({"file": f, "age": age, "gender": gender, "race": race})

df = pd.DataFrame(rows)
print("Total parsed images:", len(df))
df.head()


Total parsed images: 4087


Unnamed: 0,file,age,gender,race
0,/content/drive/MyDrive/CACD/crop_part1/84_1_4_...,84,1,4
1,/content/drive/MyDrive/CACD/crop_part1/84_1_2_...,84,1,2
2,/content/drive/MyDrive/CACD/crop_part1/85_1_0_...,85,1,0
3,/content/drive/MyDrive/CACD/crop_part1/85_1_0_...,85,1,0
4,/content/drive/MyDrive/CACD/crop_part1/85_1_0_...,85,1,0


Split Train / Validation / Test

In [None]:
from sklearn.model_selection import train_test_split

train_df, tmp = train_test_split(df, test_size=0.2, random_state=42)
val_df, test_df = train_test_split(tmp, test_size=0.5, random_state=42)

print(len(train_df), len(val_df), len(test_df))


3269 409 409


Building tensorflow datasets

In [None]:
import tensorflow as tf

IMG_SIZE = 224
BATCH_SIZE = 32
AUTOTUNE = tf.data.AUTOTUNE

def preprocess_image(path):
    image = tf.io.read_file(path)
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.resize(image, [IMG_SIZE, IMG_SIZE])
    image = image / 255.0
    return image

def df_to_dataset(dataframe, label_col, batch_size=BATCH_SIZE, shuffle=True):
    paths = dataframe['file'].values
    labels = dataframe[label_col].values.astype('float32')
    ds = tf.data.Dataset.from_tensor_slices((paths, labels))
    def load(path, label):
        img = preprocess_image(path)
        return img, label
    ds = ds.map(load, num_parallel_calls=AUTOTUNE)
    if shuffle:
        ds = ds.shuffle(buffer_size=len(dataframe))
    ds = ds.batch(batch_size).prefetch(AUTOTUNE)
    return ds

# Example datasets
train_gender_ds = df_to_dataset(train_df, 'gender')
val_gender_ds = df_to_dataset(val_df, 'gender', shuffle=False)
test_gender_ds = df_to_dataset(test_df, 'gender', shuffle=False)

train_age_ds = df_to_dataset(train_df, 'age')
val_age_ds = df_to_dataset(val_df, 'age', shuffle=False)
test_age_ds = df_to_dataset(test_df, 'age', shuffle=False)


Data Augmentation


In [None]:
data_augmentation = tf.keras.Sequential([
    tf.keras.layers.RandomFlip("horizontal"),
    tf.keras.layers.RandomRotation(0.08),
    tf.keras.layers.RandomZoom(0.06),
])


Gender Classification Model


In [None]:
base = tf.keras.applications.MobileNetV2(input_shape=(IMG_SIZE,IMG_SIZE,3),
                                         include_top=False, weights='imagenet')
base.trainable = False

inputs = tf.keras.Input(shape=(IMG_SIZE,IMG_SIZE,3))
x = data_augmentation(inputs)
x = tf.keras.applications.mobilenet_v2.preprocess_input(x)
x = base(x, training=False)
x = tf.keras.layers.GlobalAveragePooling2D()(x)
x = tf.keras.layers.Dropout(0.3)(x)
outputs = tf.keras.layers.Dense(1, activation='sigmoid')(x)

model_gender = tf.keras.Model(inputs, outputs)

model_gender.compile(optimizer='adam',
                     loss='binary_crossentropy',
                     metrics=['accuracy'])

model_gender.summary()


In [None]:
#train:

callbacks = [
    tf.keras.callbacks.ModelCheckpoint("best_gender.h5", save_best_only=True, monitor="val_accuracy"),
    tf.keras.callbacks.EarlyStopping(patience=6, restore_best_weights=True, monitor="val_accuracy")
]

history = model_gender.fit(train_gender_ds, validation_data=val_gender_ds, epochs=20, callbacks=callbacks)


Epoch 1/20
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - accuracy: 0.5226 - loss: 0.7583



[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m261s[0m 2s/step - accuracy: 0.5226 - loss: 0.7581 - val_accuracy: 0.5599 - val_loss: 0.6884
Epoch 2/20
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m231s[0m 2s/step - accuracy: 0.5452 - loss: 0.7095 - val_accuracy: 0.5599 - val_loss: 0.6884
Epoch 3/20
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m265s[0m 2s/step - accuracy: 0.5441 - loss: 0.7107 - val_accuracy: 0.5599 - val_loss: 0.6846
Epoch 4/20
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m257s[0m 2s/step - accuracy: 0.5222 - loss: 0.7154 - val_accuracy: 0.5599 - val_loss: 0.6860
Epoch 5/20
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m218s[0m 2s/step - accuracy: 0.5474 - loss: 0.6994 - val_accuracy: 0.5599 - val_loss: 0.6843
Epoch 6/20
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - accuracy: 0.5660 - loss: 0.6900



[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m271s[0m 2s/step - accuracy: 0.5659 - loss: 0.6900 - val_accuracy: 0.5819 - val_loss: 0.6855
Epoch 7/20
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m251s[0m 2s/step - accuracy: 0.5328 - loss: 0.6912 - val_accuracy: 0.5599 - val_loss: 0.6868
Epoch 8/20
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m217s[0m 2s/step - accuracy: 0.5441 - loss: 0.6933 - val_accuracy: 0.5746 - val_loss: 0.6820
Epoch 9/20
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m224s[0m 2s/step - accuracy: 0.5659 - loss: 0.6921 - val_accuracy: 0.5599 - val_loss: 0.6839
Epoch 10/20
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m239s[0m 2s/step - accuracy: 0.5737 - loss: 0.6813 - val_accuracy: 0.5599 - val_loss: 0.6949
Epoch 11/20
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m271s[0m 2s/step - accuracy: 0.5537 - loss: 0.6982 - val_accuracy: 0.5599 - val_loss: 0.7052
Epoch 12/20
[1m103/103[0m [3

In [None]:
import numpy as np
from sklearn.metrics import classification_report, confusion_matrix

preds = (model_gender.predict(test_gender_ds) > 0.5).astype(int).squeeze()
y_true = test_df['gender'].values

print(classification_report(y_true, preds))
print(confusion_matrix(y_true, preds))


[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 2s/step
              precision    recall  f1-score   support

           0       0.71      0.09      0.16       187
           1       0.56      0.97      0.71       222

    accuracy                           0.57       409
   macro avg       0.63      0.53      0.43       409
weighted avg       0.63      0.57      0.46       409

[[ 17 170]
 [  7 215]]


Age Regression Model

In [None]:
base = tf.keras.applications.MobileNetV2(input_shape=(IMG_SIZE,IMG_SIZE,3),
                                         include_top=False, weights='imagenet')
base.trainable = False

inp = tf.keras.Input(shape=(IMG_SIZE,IMG_SIZE,3))
x = data_augmentation(inp)
x = tf.keras.applications.mobilenet_v2.preprocess_input(x)
x = base(x, training=False)
x = tf.keras.layers.GlobalAveragePooling2D()(x)
x = tf.keras.layers.Dropout(0.3)(x)
x = tf.keras.layers.Dense(128, activation='relu')(x)
out = tf.keras.layers.Dense(1, activation='linear')(x)

model_age = tf.keras.Model(inp, out)

model_age.compile(optimizer='adam', loss='mse',
                  metrics=[tf.keras.metrics.MeanAbsoluteError()])

model_age.summary()

callbacks = [tf.keras.callbacks.ModelCheckpoint("best_age.h5", save_best_only=True, monitor="val_loss"),
             tf.keras.callbacks.EarlyStopping(patience=6, restore_best_weights=True)]

history = model_age.fit(train_age_ds, validation_data=val_age_ds, epochs=30, callbacks=callbacks)


Epoch 1/30
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - loss: 1991.0063 - mean_absolute_error: 39.4342



[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m278s[0m 2s/step - loss: 1987.6210 - mean_absolute_error: 39.4252 - val_loss: 1494.9728 - val_mean_absolute_error: 38.3905
Epoch 2/30
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m240s[0m 2s/step - loss: 1471.7493 - mean_absolute_error: 37.8032 - val_loss: 1544.6943 - val_mean_absolute_error: 38.9486
Epoch 3/30
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - loss: 1493.1663 - mean_absolute_error: 38.2124



[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m219s[0m 2s/step - loss: 1493.0457 - mean_absolute_error: 38.2111 - val_loss: 1491.9935 - val_mean_absolute_error: 38.3943
Epoch 4/30
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - loss: 1482.8036 - mean_absolute_error: 38.1737



[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m261s[0m 2s/step - loss: 1482.7924 - mean_absolute_error: 38.1730 - val_loss: 1488.2041 - val_mean_absolute_error: 38.3250
Epoch 5/30
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - loss: 1486.7866 - mean_absolute_error: 37.9712



[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m259s[0m 2s/step - loss: 1486.7556 - mean_absolute_error: 37.9708 - val_loss: 1483.8602 - val_mean_absolute_error: 38.1996
Epoch 6/30
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m210s[0m 2s/step - loss: 1485.7362 - mean_absolute_error: 38.0244 - val_loss: 1486.3727 - val_mean_absolute_error: 38.3417
Epoch 7/30
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m263s[0m 2s/step - loss: 1467.7811 - mean_absolute_error: 37.6608 - val_loss: 1484.7537 - val_mean_absolute_error: 38.3261
Epoch 8/30
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m210s[0m 2s/step - loss: 1468.8593 - mean_absolute_error: 37.8641 - val_loss: 1488.1185 - val_mean_absolute_error: 38.3887
Epoch 9/30
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m210s[0m 2s/step - loss: 1480.0713 - mean_absolute_error: 37.9447 - val_loss: 1504.5177 - val_mean_absolute_error: 38.5543
Epoch 10/30
[1m103/103[0m [32m━━━━━━━━━━━━━



[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m270s[0m 2s/step - loss: 1470.3885 - mean_absolute_error: 37.9312 - val_loss: 1475.1469 - val_mean_absolute_error: 38.1909
Epoch 11/30
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - loss: 1460.1589 - mean_absolute_error: 37.8470



[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m259s[0m 2s/step - loss: 1460.1240 - mean_absolute_error: 37.8457 - val_loss: 1470.6647 - val_mean_absolute_error: 37.7794
Epoch 12/30
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m206s[0m 2s/step - loss: 1450.4229 - mean_absolute_error: 37.3765 - val_loss: 1584.9209 - val_mean_absolute_error: 38.9551
Epoch 13/30
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m207s[0m 2s/step - loss: 1481.1727 - mean_absolute_error: 37.9276 - val_loss: 1481.8585 - val_mean_absolute_error: 38.2958
Epoch 14/30
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - loss: 1450.8134 - mean_absolute_error: 37.7067



[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m207s[0m 2s/step - loss: 1450.8359 - mean_absolute_error: 37.7065 - val_loss: 1459.4464 - val_mean_absolute_error: 37.8991
Epoch 15/30
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m264s[0m 2s/step - loss: 1464.5056 - mean_absolute_error: 37.8582 - val_loss: 1462.6382 - val_mean_absolute_error: 38.0312
Epoch 16/30
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - loss: 1443.0205 - mean_absolute_error: 37.5806



[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m287s[0m 2s/step - loss: 1443.1472 - mean_absolute_error: 37.5818 - val_loss: 1453.5183 - val_mean_absolute_error: 37.7794
Epoch 17/30
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m232s[0m 2s/step - loss: 1455.4901 - mean_absolute_error: 37.6703 - val_loss: 1487.3848 - val_mean_absolute_error: 38.2913
Epoch 18/30
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m267s[0m 2s/step - loss: 1447.4985 - mean_absolute_error: 37.3473 - val_loss: 1468.1008 - val_mean_absolute_error: 38.1049
Epoch 19/30
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m254s[0m 2s/step - loss: 1433.8846 - mean_absolute_error: 37.3768 - val_loss: 1463.0847 - val_mean_absolute_error: 38.0384
Epoch 20/30
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - loss: 1441.8860 - mean_absolute_error: 37.4135



[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m205s[0m 2s/step - loss: 1441.9067 - mean_absolute_error: 37.4140 - val_loss: 1450.1382 - val_mean_absolute_error: 37.8624
Epoch 21/30
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m271s[0m 2s/step - loss: 1430.6940 - mean_absolute_error: 37.2638 - val_loss: 1477.1840 - val_mean_absolute_error: 36.9124
Epoch 22/30
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - loss: 1467.5963 - mean_absolute_error: 37.2466



[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m259s[0m 2s/step - loss: 1467.3979 - mean_absolute_error: 37.2476 - val_loss: 1435.5024 - val_mean_absolute_error: 37.3701
Epoch 23/30
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m204s[0m 2s/step - loss: 1433.3313 - mean_absolute_error: 37.1635 - val_loss: 1445.2168 - val_mean_absolute_error: 37.7868
Epoch 24/30
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - loss: 1440.0959 - mean_absolute_error: 37.3449



[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m202s[0m 2s/step - loss: 1440.1252 - mean_absolute_error: 37.3448 - val_loss: 1429.2131 - val_mean_absolute_error: 37.2384
Epoch 25/30
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - loss: 1429.2145 - mean_absolute_error: 37.2691



[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m268s[0m 2s/step - loss: 1429.1936 - mean_absolute_error: 37.2691 - val_loss: 1425.0461 - val_mean_absolute_error: 37.1713
Epoch 26/30
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - loss: 1444.2179 - mean_absolute_error: 37.2825



[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m215s[0m 2s/step - loss: 1444.0240 - mean_absolute_error: 37.2803 - val_loss: 1424.1309 - val_mean_absolute_error: 37.4613
Epoch 27/30
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m208s[0m 2s/step - loss: 1411.6027 - mean_absolute_error: 36.8237 - val_loss: 1439.2646 - val_mean_absolute_error: 37.6635
Epoch 28/30
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - loss: 1443.2987 - mean_absolute_error: 37.4003



[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m260s[0m 2s/step - loss: 1443.1829 - mean_absolute_error: 37.3983 - val_loss: 1413.2914 - val_mean_absolute_error: 37.0682
Epoch 29/30
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - loss: 1428.6002 - mean_absolute_error: 37.1631



[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m272s[0m 2s/step - loss: 1428.6265 - mean_absolute_error: 37.1628 - val_loss: 1410.2720 - val_mean_absolute_error: 37.0003
Epoch 30/30
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - loss: 1425.1466 - mean_absolute_error: 37.1182



[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m256s[0m 2s/step - loss: 1425.0707 - mean_absolute_error: 37.1172 - val_loss: 1406.3196 - val_mean_absolute_error: 37.1370


In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np

preds = model_age.predict(test_age_ds).squeeze()
y_true = test_df['age'].values

mae = mean_absolute_error(y_true, preds)
mse = mean_squared_error(y_true, preds)
rmse = np.sqrt(mse)
print("MAE:", mae, "RMSE:", rmse)

[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 2s/step
MAE: 36.89067840576172 RMSE: 37.2466179253231


Preparing Dataset


In [None]:
import os, glob, pandas as pd
from sklearn.model_selection import train_test_split
import tensorflow as tf

# Path to dataset folder in Google Drive
DATA_DIR = "/content/drive/MyDrive/CACD"

files = glob.glob(os.path.join(DATA_DIR, "crop_part1", "**/*"), recursive=True)
rows=[]
for f in files:
    try:
        fn = os.path.basename(f)
        age, gender, race = fn.split("_")[:3]
        rows.append({"file": f, "age": int(age), "gender": int(gender), "race": int(race)})
    except:
        continue

df = pd.DataFrame(rows)
train_df, tmp = train_test_split(df, test_size=0.2, random_state=42)
val_df, test_df = train_test_split(tmp, test_size=0.5, random_state=42)

print(len(train_df), len(val_df), len(test_df))

3269 409 409


Creating Data Pipeline


In [None]:
IMG_SIZE = 128  # smaller for CNN from scratch
BATCH_SIZE = 32
AUTOTUNE = tf.data.AUTOTUNE

def preprocess_image(path):
    image = tf.io.read_file(path)
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.resize(image, [IMG_SIZE, IMG_SIZE])
    image = image / 255.0
    return image

def df_to_dataset(dataframe, label_col, batch_size=BATCH_SIZE, shuffle=True):
    paths = dataframe['file'].values
    labels = dataframe[label_col].values.astype('float32')
    ds = tf.data.Dataset.from_tensor_slices((paths, labels))
    def load(path, label):
        img = preprocess_image(path)
        return img, label
    ds = ds.map(load, num_parallel_calls=AUTOTUNE)
    if shuffle:
        ds = ds.shuffle(buffer_size=len(dataframe))
    ds = ds.batch(batch_size).prefetch(AUTOTUNE)
    return ds

# Example datasets
train_gender_ds = df_to_dataset(train_df, 'gender')
val_gender_ds   = df_to_dataset(val_df, 'gender', shuffle=False)
test_gender_ds  = df_to_dataset(test_df, 'gender', shuffle=False)

train_age_ds = df_to_dataset(train_df, 'age')
val_age_ds   = df_to_dataset(val_df, 'age', shuffle=False)
test_age_ds  = df_to_dataset(test_df, 'age', shuffle=False)


CNN for Gender Classification

In [None]:
from tensorflow.keras import layers, models

def build_cnn_classifier(input_shape=(IMG_SIZE, IMG_SIZE, 3)):
    model = models.Sequential([
        layers.Conv2D(32, (3,3), activation='relu', input_shape=input_shape),
        layers.MaxPooling2D((2,2)),
        layers.Conv2D(64, (3,3), activation='relu'),
        layers.MaxPooling2D((2,2)),
        layers.Conv2D(128, (3,3), activation='relu'),
        layers.MaxPooling2D((2,2)),
        layers.Flatten(),
        layers.Dense(128, activation='relu'),
        layers.Dropout(0.3),
        layers.Dense(1, activation='sigmoid')   # Binary classification
    ])
    model.compile(optimizer='adam',
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    return model

cnn_gender = build_cnn_classifier()
cnn_gender.summary()


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [None]:
callbacks = [
    tf.keras.callbacks.ModelCheckpoint("cnn_gender.h5", save_best_only=True, monitor="val_accuracy"),
    tf.keras.callbacks.EarlyStopping(patience=5, restore_best_weights=True, monitor="val_accuracy")
]

history_gender = cnn_gender.fit(train_gender_ds, validation_data=val_gender_ds,
                                epochs=20, callbacks=callbacks)


Epoch 1/20
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - accuracy: 0.5549 - loss: 0.7665



[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m141s[0m 1s/step - accuracy: 0.5551 - loss: 0.7658 - val_accuracy: 0.6479 - val_loss: 0.6239
Epoch 2/20
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - accuracy: 0.6743 - loss: 0.6146



[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m135s[0m 1s/step - accuracy: 0.6745 - loss: 0.6143 - val_accuracy: 0.7066 - val_loss: 0.6171
Epoch 3/20
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - accuracy: 0.7055 - loss: 0.5838



[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m129s[0m 1s/step - accuracy: 0.7057 - loss: 0.5834 - val_accuracy: 0.7726 - val_loss: 0.4901
Epoch 4/20
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - accuracy: 0.7719 - loss: 0.4603



[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m125s[0m 1s/step - accuracy: 0.7719 - loss: 0.4603 - val_accuracy: 0.7897 - val_loss: 0.4348
Epoch 5/20
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - accuracy: 0.8205 - loss: 0.3971



[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m148s[0m 1s/step - accuracy: 0.8207 - loss: 0.3969 - val_accuracy: 0.8509 - val_loss: 0.3474
Epoch 6/20
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - accuracy: 0.8715 - loss: 0.2922



[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m146s[0m 1s/step - accuracy: 0.8716 - loss: 0.2921 - val_accuracy: 0.8778 - val_loss: 0.2750
Epoch 7/20
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - accuracy: 0.9164 - loss: 0.2043



[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m137s[0m 1s/step - accuracy: 0.9164 - loss: 0.2043 - val_accuracy: 0.9095 - val_loss: 0.2653
Epoch 8/20
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m131s[0m 1s/step - accuracy: 0.9503 - loss: 0.1468 - val_accuracy: 0.8900 - val_loss: 0.2511
Epoch 9/20
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - accuracy: 0.9530 - loss: 0.1263



[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m139s[0m 1s/step - accuracy: 0.9531 - loss: 0.1262 - val_accuracy: 0.9315 - val_loss: 0.1663
Epoch 10/20
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - accuracy: 0.9809 - loss: 0.0604



[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m142s[0m 1s/step - accuracy: 0.9809 - loss: 0.0604 - val_accuracy: 0.9682 - val_loss: 0.0818
Epoch 11/20
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - accuracy: 0.9833 - loss: 0.0496



[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m145s[0m 1s/step - accuracy: 0.9834 - loss: 0.0495 - val_accuracy: 0.9804 - val_loss: 0.0534
Epoch 12/20
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m128s[0m 1s/step - accuracy: 0.9947 - loss: 0.0244 - val_accuracy: 0.9682 - val_loss: 0.0761
Epoch 13/20
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m125s[0m 1s/step - accuracy: 0.9889 - loss: 0.0366 - val_accuracy: 0.9804 - val_loss: 0.0617
Epoch 14/20
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m127s[0m 1s/step - accuracy: 0.9827 - loss: 0.0500 - val_accuracy: 0.9804 - val_loss: 0.0672
Epoch 15/20
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - accuracy: 0.9946 - loss: 0.0181



[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m127s[0m 1s/step - accuracy: 0.9945 - loss: 0.0183 - val_accuracy: 0.9829 - val_loss: 0.0634
Epoch 16/20
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m141s[0m 1s/step - accuracy: 0.9958 - loss: 0.0191 - val_accuracy: 0.9780 - val_loss: 0.0490
Epoch 17/20
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m130s[0m 1s/step - accuracy: 0.9953 - loss: 0.0157 - val_accuracy: 0.9804 - val_loss: 0.0667
Epoch 18/20
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m126s[0m 1s/step - accuracy: 0.9779 - loss: 0.0740 - val_accuracy: 0.9731 - val_loss: 0.0805
Epoch 19/20
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m149s[0m 1s/step - accuracy: 0.9883 - loss: 0.0311 - val_accuracy: 0.9829 - val_loss: 0.0795
Epoch 20/20
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m130s[0m 1s/step - accuracy: 0.9923 - loss: 0.0206 - val_accuracy: 0.9780 - val_loss: 0.0742


In [None]:
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np

preds = (cnn_gender.predict(test_gender_ds) > 0.5).astype(int).squeeze()
y_true = test_df['gender'].values

print(classification_report(y_true, preds))
print(confusion_matrix(y_true, preds))


[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 528ms/step
              precision    recall  f1-score   support

           0       0.98      0.95      0.96       187
           1       0.96      0.99      0.97       222

    accuracy                           0.97       409
   macro avg       0.97      0.97      0.97       409
weighted avg       0.97      0.97      0.97       409

[[177  10]
 [  3 219]]


CNN for Age Regression

In [None]:
def build_cnn_regressor(input_shape=(IMG_SIZE, IMG_SIZE, 3)):
    model = models.Sequential([
        layers.Conv2D(32, (3,3), activation='relu', input_shape=input_shape),
        layers.MaxPooling2D((2,2)),
        layers.Conv2D(64, (3,3), activation='relu'),
        layers.MaxPooling2D((2,2)),
        layers.Conv2D(128, (3,3), activation='relu'),
        layers.MaxPooling2D((2,2)),
        layers.Flatten(),
        layers.Dense(128, activation='relu'),
        layers.Dropout(0.3),
        layers.Dense(1, activation='linear')   # Regression
    ])
    model.compile(optimizer='adam',
                  loss='mse',
                  metrics=[tf.keras.metrics.MeanAbsoluteError()])
    return model

cnn_age = build_cnn_regressor()
cnn_age.summary()


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [None]:
callbacks = [
    tf.keras.callbacks.ModelCheckpoint("cnn_age.h5", save_best_only=True, monitor="val_loss"),
    tf.keras.callbacks.EarlyStopping(patience=5, restore_best_weights=True, monitor="val_loss")
]

history_age = cnn_age.fit(train_age_ds, validation_data=val_age_ds,
                          epochs=30, callbacks=callbacks)


Epoch 1/30
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - loss: 1837.3967 - mean_absolute_error: 39.2146



[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m131s[0m 1s/step - loss: 1834.6818 - mean_absolute_error: 39.1893 - val_loss: 1298.9552 - val_mean_absolute_error: 30.1949
Epoch 2/30
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - loss: 899.7562 - mean_absolute_error: 24.7309



[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m132s[0m 1s/step - loss: 898.0389 - mean_absolute_error: 24.7009 - val_loss: 426.5154 - val_mean_absolute_error: 15.7678
Epoch 3/30
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m140s[0m 1s/step - loss: 568.1614 - mean_absolute_error: 18.5801 - val_loss: 461.6264 - val_mean_absolute_error: 15.5039
Epoch 4/30
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - loss: 509.4454 - mean_absolute_error: 17.1649



[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m130s[0m 1s/step - loss: 508.6849 - mean_absolute_error: 17.1503 - val_loss: 318.4075 - val_mean_absolute_error: 14.2461
Epoch 5/30
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - loss: 406.5163 - mean_absolute_error: 15.2406



[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m143s[0m 1s/step - loss: 406.2360 - mean_absolute_error: 15.2340 - val_loss: 268.6572 - val_mean_absolute_error: 11.7941
Epoch 6/30
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - loss: 294.2374 - mean_absolute_error: 12.6612



[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m128s[0m 1s/step - loss: 294.1250 - mean_absolute_error: 12.6590 - val_loss: 258.1079 - val_mean_absolute_error: 12.2429
Epoch 7/30
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m128s[0m 1s/step - loss: 283.1175 - mean_absolute_error: 12.4598 - val_loss: 296.8770 - val_mean_absolute_error: 12.8697
Epoch 8/30
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - loss: 225.8526 - mean_absolute_error: 11.0509



[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m147s[0m 1s/step - loss: 225.8768 - mean_absolute_error: 11.0520 - val_loss: 229.9767 - val_mean_absolute_error: 10.4927
Epoch 9/30
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m135s[0m 1s/step - loss: 220.8190 - mean_absolute_error: 10.8318 - val_loss: 266.5003 - val_mean_absolute_error: 11.4377
Epoch 10/30
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - loss: 176.4928 - mean_absolute_error: 9.6890



[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m129s[0m 1s/step - loss: 176.5247 - mean_absolute_error: 9.6903 - val_loss: 162.6810 - val_mean_absolute_error: 8.9247
Epoch 11/30
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m145s[0m 1s/step - loss: 158.4618 - mean_absolute_error: 9.2203 - val_loss: 171.7849 - val_mean_absolute_error: 9.3222
Epoch 12/30
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - loss: 128.8794 - mean_absolute_error: 8.1384



[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m127s[0m 1s/step - loss: 128.9463 - mean_absolute_error: 8.1400 - val_loss: 136.7844 - val_mean_absolute_error: 8.0286
Epoch 13/30
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - loss: 133.2393 - mean_absolute_error: 8.2559



[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m142s[0m 1s/step - loss: 133.2234 - mean_absolute_error: 8.2551 - val_loss: 116.4754 - val_mean_absolute_error: 7.3625
Epoch 14/30
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - loss: 130.1234 - mean_absolute_error: 8.2185



[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m128s[0m 1s/step - loss: 129.9982 - mean_absolute_error: 8.2144 - val_loss: 112.7831 - val_mean_absolute_error: 6.9442
Epoch 15/30
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - loss: 123.4963 - mean_absolute_error: 8.1079



[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m141s[0m 1s/step - loss: 123.5493 - mean_absolute_error: 8.1092 - val_loss: 109.5516 - val_mean_absolute_error: 7.0620
Epoch 16/30
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - loss: 112.1798 - mean_absolute_error: 7.6748



[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m126s[0m 1s/step - loss: 112.0828 - mean_absolute_error: 7.6709 - val_loss: 97.6659 - val_mean_absolute_error: 6.1582
Epoch 17/30
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - loss: 90.8218 - mean_absolute_error: 6.7775



[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m132s[0m 1s/step - loss: 90.8461 - mean_absolute_error: 6.7781 - val_loss: 91.7586 - val_mean_absolute_error: 6.2108
Epoch 18/30
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m131s[0m 1s/step - loss: 98.9525 - mean_absolute_error: 7.1599 - val_loss: 113.7160 - val_mean_absolute_error: 7.2217
Epoch 19/30
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m125s[0m 1s/step - loss: 91.1439 - mean_absolute_error: 6.7657 - val_loss: 92.2067 - val_mean_absolute_error: 6.1350
Epoch 20/30
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m128s[0m 1s/step - loss: 71.6563 - mean_absolute_error: 5.9505 - val_loss: 111.5748 - val_mean_absolute_error: 7.0884
Epoch 21/30
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - loss: 85.1320 - mean_absolute_error: 6.3611



[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m126s[0m 1s/step - loss: 85.1128 - mean_absolute_error: 6.3612 - val_loss: 75.8105 - val_mean_absolute_error: 5.6030
Epoch 22/30
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m141s[0m 1s/step - loss: 69.8224 - mean_absolute_error: 6.0482 - val_loss: 80.9574 - val_mean_absolute_error: 5.7433
Epoch 23/30
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - loss: 63.5498 - mean_absolute_error: 5.5915



[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m133s[0m 1s/step - loss: 63.6159 - mean_absolute_error: 5.5944 - val_loss: 73.1614 - val_mean_absolute_error: 5.7341
Epoch 24/30
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m134s[0m 1s/step - loss: 77.9253 - mean_absolute_error: 6.2961 - val_loss: 97.2050 - val_mean_absolute_error: 7.0579
Epoch 25/30
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - loss: 80.2127 - mean_absolute_error: 6.2602



[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m132s[0m 1s/step - loss: 80.1599 - mean_absolute_error: 6.2586 - val_loss: 67.0964 - val_mean_absolute_error: 5.0414
Epoch 26/30
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m136s[0m 1s/step - loss: 59.7759 - mean_absolute_error: 5.3694 - val_loss: 98.8739 - val_mean_absolute_error: 6.7860
Epoch 27/30
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - loss: 62.8531 - mean_absolute_error: 5.5652



[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m127s[0m 1s/step - loss: 62.8396 - mean_absolute_error: 5.5643 - val_loss: 61.7404 - val_mean_absolute_error: 4.7151
Epoch 28/30
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - loss: 78.5661 - mean_absolute_error: 6.2990



[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m128s[0m 1s/step - loss: 78.5346 - mean_absolute_error: 6.2973 - val_loss: 57.8826 - val_mean_absolute_error: 4.3955
Epoch 29/30
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m142s[0m 1s/step - loss: 71.2098 - mean_absolute_error: 5.8959 - val_loss: 75.6647 - val_mean_absolute_error: 5.5147
Epoch 30/30
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m132s[0m 1s/step - loss: 62.8476 - mean_absolute_error: 5.6020 - val_loss: 59.2240 - val_mean_absolute_error: 4.5308


In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np

preds = cnn_age.predict(test_age_ds).squeeze()
y_true = test_df['age'].values

mae = mean_absolute_error(y_true, preds)
mse = mean_squared_error(y_true, preds)
rmse = np.sqrt(mse)
print("MAE:", mae, "RMSE:", rmse)

[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 490ms/step
MAE: 4.320807933807373 RMSE: 7.394510872677285
