# Age Detection Model

**Import necessary libraries**

In [1]:
import os                  
import cv2                 
import numpy as np
from sklearn.model_selection import train_test_split

import tensorflow as tf
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Dropout, GlobalAveragePooling2D
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from tqdm import tqdm

import matplotlib.pyplot as plt



2025-07-04 12:35:41.952176: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1751632542.125684      35 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1751632542.175831      35 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


**Load the dataset**

In [2]:
image_data='/kaggle/input/utk-face-cropped/utkcropped'    
image_size = 224            # We resize all images to 224x224 for our MobileNetV2 Model
X,y=[],[]

files = os.listdir(image_data)[:5000] #Work on the 5000 becasue the system is not handle the overall dataset

for file in tqdm(files):
    try:
        age = int(file.split("_")[0])  # Extract the  age via filename
        img_path = os.path.join(image_data, file)
        
        image = cv2.imread(img_path)
        image = cv2.resize(image, (image_size, image_size))
        image = image / 255.0  # Normalize the images

        X.append(image)
        y.append(age)
    except:
        continue

X = np.array(X)
y = np.array(y)

print("Total images loaded:-", len(X))
print("shape of X:-", X.shape)
print("Shape of y:-", y.shape)

100%|██████████| 5000/5000 [00:38<00:00, 130.69it/s]


Total images loaded:- 5000
shape of X:- (5000, 224, 224, 3)
Shape of y:- (5000,)


**Split the data for training and testing**

In [3]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)
#print the quantity of training and testing data
print(f"Training data for Model:- {len(X_train)}")
print(f"Testing data for Model:- {len(X_test)}")

Training data for Model:- 4000
Testing data for Model:- 1000


**Build the MobileNetV2-Based Model**

In [4]:
#Define the shape for input
input_shape=(224,224,3)

## Load pre-trained MobileNetV2 model without the top layer to make our own layer
base_model=MobileNetV2(
    input_shape=input_shape,
    include_top=False,
    weights='imagenet'
    )

#Stop the base model layer
base_model.trainable=False

# create the new model on first of base
inputs=Input(shape=input_shape)
x=base_model(inputs,training=False)
x=GlobalAveragePooling2D()(x)
x=Dropout(0.3)(x)
x=Dense(128,activation='relu')(x)
x=Dropout(0.3)(x)

output=Dense(1)(x) #One output neuraon for predicting the age

#Bulid the proper Model
model=Model(inputs,output)

#compile the model using the MSL(mean square loss) which is part of regression
model.compile(optimizer='adam',
             loss='mean_squared_error',
             metrics=['mae'])
#Mean absolute Error to creating our model more readable
print("done")

I0000 00:00:1751632596.289906      35 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 13942 MB memory:  -> device: 0, name: Tesla T4, pci bus id: 0000:00:04.0, compute capability: 7.5
I0000 00:00:1751632596.290599      35 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 13942 MB memory:  -> device: 1, name: Tesla T4, pci bus id: 0000:00:05.0, compute capability: 7.5


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224_no_top.h5
[1m9406464/9406464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
done


**Training the model**

In [5]:
#set the Training Callbacks for better training
checkpoint=ModelCheckpoint('age_model.h5',      # Save better accuracy model to this .h5 file
                          monitor='val_loss',   # Checking the validation loss to save model
                          save_best_only=True,
                          verbose=1)

earlystops=EarlyStopping(monitor='val_loss',        # Stop if validation loss stops improving
                        patience=5,                 # Wait for 5 epochs before stopping
                        restore_best_weights=True)  # Roll back to best weights
print("done")

done


In [6]:
history=model.fit(X_train,y_train,
                 validation_data=(X_test,y_test),
                 epochs=15,
                 batch_size=16,                      # there is number of samples per step
                 callbacks=[checkpoint, earlystops])  # checkpoint improve training with callbacks

print("done")

Epoch 1/15


I0000 00:00:1751632611.586853     101 service.cc:148] XLA service 0x7bcd60094570 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1751632611.587443     101 service.cc:156]   StreamExecutor device (0): Tesla T4, Compute Capability 7.5
I0000 00:00:1751632611.587474     101 service.cc:156]   StreamExecutor device (1): Tesla T4, Compute Capability 7.5
I0000 00:00:1751632612.396199     101 cuda_dnn.cc:529] Loaded cuDNN version 90300


[1m  7/250[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m5s[0m 22ms/step - loss: 992.3543 - mae: 27.0733 

I0000 00:00:1751632616.693828     101 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m249/250[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 17ms/step - loss: 411.5303 - mae: 15.6879
Epoch 1: val_loss improved from inf to 226.89343, saving model to age_model.h5
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 53ms/step - loss: 410.6140 - mae: 15.6686 - val_loss: 226.8934 - val_mae: 11.7218
Epoch 2/15
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - loss: 218.5954 - mae: 11.4963
Epoch 2: val_loss improved from 226.89343 to 206.81035, saving model to age_model.h5
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 22ms/step - loss: 218.5527 - mae: 11.4951 - val_loss: 206.8103 - val_mae: 11.0481
Epoch 3/15
[1m247/250[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 17ms/step - loss: 202.6423 - mae: 10.9818
Epoch 3: val_loss did not improve from 206.81035
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 21ms/step - loss: 202.6327 - mae: 10.9811 - val_loss: 224.3711 - val_mae: 11.6

In [8]:
mae = 9.69         # from the model val_mae
max_age = 116      # take the our dataset max age for formula (or 100 in UTK)
accuracy = (1 - (mae / max_age)) * 100 #Formula to convert the mae into the accuracy
print(f"Estimated Accuracy: {accuracy:.2f}%")


Estimated Accuracy: 91.65%
