# Age Regression CNN — UTKFace (Aligned & Cropped)
**But** : prédire l'âge (valeur continue) à partir d'une image de visage.

- Dataset : UTKFace *Aligned & Cropped* (≈107 MB)
- Modèle : MobileNetV2 (ImageNet) + tête régression
- Perte : MSE ; Métrique : MAE (erreur moyenne en années)

In [1]:
!nvidia-smi || echo "No NVIDIA GPU (CPU only)"
!pip -q install -U tensorflow gradio

import os, re, zipfile, random, math, json, pathlib
import numpy as np
import pandas as pd

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

from sklearn.model_selection import train_test_split
print("TF:", tf.__version__)


Fri Sep 26 08:08:41 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   43C    P8              9W /   70W |       0MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [None]:
import kagglehub, os, shutil

path = kagglehub.dataset_download("jangedoo/utkface-new")
print("Path to dataset files:", path)

DATA_ROOT = "/content/data"
EXTRACT_DIR = os.path.join(DATA_ROOT, "UTKFace_kaggle")
if os.path.exists(EXTRACT_DIR):
    shutil.rmtree(EXTRACT_DIR)
os.makedirs(EXTRACT_DIR, exist_ok=True)


def is_img(fname):
    return fname.lower().endswith((".jpg",".jpeg",".png"))

count = 0
for root, _, files in os.walk(path):
    for f in files:
        if is_img(f):
            src = os.path.join(root, f)
            dst = os.path.join(EXTRACT_DIR, f)
            if os.path.exists(dst):
                stem, ext = os.path.splitext(f)
                i = 1
                while os.path.exists(dst):
                    dst = os.path.join(EXTRACT_DIR, f"{stem}_{i}{ext}")
                    i += 1
            shutil.copy2(src, dst)
            count += 1

print("Images copiées:", count)
print("EXTRACT_DIR =", EXTRACT_DIR)


Using Colab cache for faster access to the 'utkface-new' dataset.
Path to dataset files: /kaggle/input/utkface-new
Images copiées: 66976
EXTRACT_DIR = /content/data/UTKFace_kaggle


In [None]:
def list_images_with_age(root):
    paths, ages = [], []
    for dirpath, _, files in os.walk(root):
        for f in files:
            if f.lower().endswith((".jpg",".jpeg",".png")):
                m = re.match(r"^(\d+)_", f)
                if not m:
                    m = re.match(r"^(\d+)", f) 
                if m:
                    age = int(m.group(1))
                    if 0 <= age <= 100: 
                        paths.append(os.path.join(dirpath, f))
                        ages.append(age)
    return pd.DataFrame({"path": paths, "age": ages})

df = list_images_with_age(EXTRACT_DIR)
print("Images valides:", len(df))
df.head()


Images valides: 66928


Unnamed: 0,path,age
0,/content/data/UTKFace_kaggle/26_1_4_2017011720...,26
1,/content/data/UTKFace_kaggle/26_1_3_2017011717...,26
2,/content/data/UTKFace_kaggle/25_0_0_2017012022...,25
3,/content/data/UTKFace_kaggle/63_0_0_2017011314...,63
4,/content/data/UTKFace_kaggle/50_0_4_2017011720...,50


In [4]:
df = df.sample(frac=1, random_state=42).reset_index(drop=True)
train_df, test_df = train_test_split(df, test_size=0.10, random_state=42)
train_df, val_df  = train_test_split(train_df, test_size=0.10, random_state=42)
len(train_df), len(val_df), len(test_df)


(54211, 6024, 6693)

In [None]:
IMG_SIZE = 160
BATCH = 64
AUTOTUNE = tf.data.AUTOTUNE

def decode_image(path):
    img = tf.io.read_file(path)
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.image.resize(img, (IMG_SIZE, IMG_SIZE), antialias=True)
    return img

def preprocess(img):
    return tf.keras.applications.mobilenet_v2.preprocess_input(img)

def load_item(path, age):
    img = decode_image(path)
    img = preprocess(img)
    age = tf.cast(age, tf.float32)
    return img, age

augment = keras.Sequential([
    layers.RandomFlip("horizontal"),
    layers.RandomRotation(0.05),
    layers.RandomZoom(0.1),
], name="augment")

def df_to_ds(frame, training=False):
    ds = tf.data.Dataset.from_tensor_slices((frame["path"].values, frame["age"].values))
    ds = ds.map(load_item, num_parallel_calls=AUTOTUNE)
    if training:
        ds = ds.map(lambda x, y: (augment(x, training=True), y), num_parallel_calls=AUTOTUNE)
        ds = ds.shuffle(2048, reshuffle_each_iteration=True)
    ds = ds.batch(BATCH).prefetch(AUTOTUNE)
    return ds

train_ds = df_to_ds(train_df, training=True)
val_ds   = df_to_ds(val_df, training=False)
test_ds  = df_to_ds(test_df, training=False)

train_ds, val_ds, test_ds


(<_PrefetchDataset element_spec=(TensorSpec(shape=(None, 160, 160, 3), dtype=tf.float32, name=None), TensorSpec(shape=(None,), dtype=tf.float32, name=None))>,
 <_PrefetchDataset element_spec=(TensorSpec(shape=(None, 160, 160, 3), dtype=tf.float32, name=None), TensorSpec(shape=(None,), dtype=tf.float32, name=None))>,
 <_PrefetchDataset element_spec=(TensorSpec(shape=(None, 160, 160, 3), dtype=tf.float32, name=None), TensorSpec(shape=(None,), dtype=tf.float32, name=None))>)

In [6]:
base = tf.keras.applications.MobileNetV2(
    input_shape=(IMG_SIZE, IMG_SIZE, 3),
    include_top=False,
    weights="imagenet"
)
base.trainable = False

inputs = layers.Input(shape=(IMG_SIZE, IMG_SIZE, 3))
x = base(inputs, training=False)
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dropout(0.2)(x)
x = layers.Dense(128, activation="relu")(x)
x = layers.Dropout(0.2)(x)
outputs = layers.Dense(1, activation="linear")(x)  # âge (continu)

model = keras.Model(inputs, outputs)
model.summary()

model.compile(
    optimizer=keras.optimizers.Adam(1e-3),
    loss="mse",
    metrics=[keras.metrics.MeanAbsoluteError(name="mae")]
)


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_160_no_top.h5
[1m9406464/9406464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [7]:
callbacks = [
    keras.callbacks.EarlyStopping(monitor="val_mae", patience=5, restore_best_weights=True),
    keras.callbacks.ReduceLROnPlateau(monitor="val_mae", factor=0.5, patience=2, min_lr=1e-5)
]

history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=15,
    callbacks=callbacks,
    verbose=1
)


Epoch 1/15
[1m848/848[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m430s[0m 465ms/step - loss: 276.7477 - mae: 12.3167 - val_loss: 150.2212 - val_mae: 9.0875 - learning_rate: 0.0010
Epoch 2/15
[1m848/848[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m375s[0m 427ms/step - loss: 166.0143 - mae: 9.4972 - val_loss: 142.1604 - val_mae: 8.7835 - learning_rate: 0.0010
Epoch 3/15
[1m848/848[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m382s[0m 427ms/step - loss: 160.7422 - mae: 9.3122 - val_loss: 139.0348 - val_mae: 8.6219 - learning_rate: 0.0010
Epoch 4/15
[1m848/848[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m380s[0m 425ms/step - loss: 157.0000 - mae: 9.1928 - val_loss: 147.7343 - val_mae: 9.1821 - learning_rate: 0.0010
Epoch 5/15
[1m848/848[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m374s[0m 425ms/step - loss: 155.1964 - mae: 9.1243 - val_loss: 136.5443 - val_mae: 8.4022 - learning_rate: 0.0010
Epoch 6/15
[1m848/848[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m384s[0m 

In [None]:
base.trainable = True
for i, layer in enumerate(base.layers):
    if i < int(0.75 * len(base.layers)):  
        layer.trainable = False

model.compile(
    optimizer=keras.optimizers.Adam(5e-4),
    loss="mse",
    metrics=[keras.metrics.MeanAbsoluteError(name="mae")]
)

history_ft = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=10,
    callbacks=callbacks,
    verbose=1
)


Epoch 1/10
[1m848/848[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m422s[0m 458ms/step - loss: 140.4626 - mae: 8.4133 - val_loss: 380.1287 - val_mae: 17.1632 - learning_rate: 5.0000e-04
Epoch 2/10
[1m848/848[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m385s[0m 438ms/step - loss: 77.5328 - mae: 6.2858 - val_loss: 216.6769 - val_mae: 11.9446 - learning_rate: 5.0000e-04
Epoch 3/10
[1m848/848[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m444s[0m 440ms/step - loss: 62.5255 - mae: 5.6403 - val_loss: 155.2809 - val_mae: 10.3342 - learning_rate: 5.0000e-04
Epoch 4/10
[1m848/848[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m386s[0m 440ms/step - loss: 57.6257 - mae: 5.3841 - val_loss: 247.2207 - val_mae: 12.5482 - learning_rate: 5.0000e-04
Epoch 5/10
[1m848/848[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m386s[0m 440ms/step - loss: 51.7935 - mae: 5.1270 - val_loss: 86.5363 - val_mae: 7.2671 - learning_rate: 5.0000e-04
Epoch 6/10
[1m848/848[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m

In [9]:
test_loss, test_mae = model.evaluate(test_ds, verbose=0)
print(f"Test — MSE: {test_loss:.3f} | MAE: {test_mae:.3f} ans")

y_true, y_pred = [], []
for xb, yb in test_ds:
    preds = model.predict(xb, verbose=0)
    y_true.extend(yb.numpy().tolist())
    y_pred.extend(preds.squeeze().tolist())

y_true = np.array(y_true, dtype=float)
y_pred = np.clip(np.array(y_pred, dtype=float), 0, 100)

mae = np.mean(np.abs(y_true - y_pred))
within5 = np.mean(np.abs(y_true - y_pred) <= 5.0)
print(f"MAE global: {mae:.2f} ans")
print(f"Précision ±5 ans: {within5*100:.1f}%")


Test — MSE: 35.768 | MAE: 4.427 ans
MAE global: 4.43 ans
Précision ±5 ans: 64.5%


In [10]:
SAVE_DIR = "/content/age_regressor"
os.makedirs(SAVE_DIR, exist_ok=True)
MODEL_PATH = os.path.join(SAVE_DIR, "age_regressor.h5")
model.save(MODEL_PATH)
print("Sauvegardé:", MODEL_PATH)

def predict_age(img_path: str) -> float:
    img = tf.io.read_file(img_path)
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.image.resize(img, (IMG_SIZE, IMG_SIZE), antialias=True)
    img = tf.keras.applications.mobilenet_v2.preprocess_input(img)
    pred = model.predict(tf.expand_dims(img, 0), verbose=0)[0][0]
    return float(np.clip(pred, 0, 100))

sample_path = test_df.iloc[0]["path"]
print("Exemple:", sample_path)
print("Âge vrai:", int(test_df.iloc[0]["age"]), "| Âge estimé:", round(predict_age(sample_path), 1))




Sauvegardé: /content/age_regressor/age_regressor.h5
Exemple: /content/data/UTKFace_kaggle/1_1_3_20161220220708394.jpg.chip_3.jpg
Âge vrai: 1 | Âge estimé: 1.7


In [None]:
import gradio as gr

def infer_gradio(img):
    img = tf.keras.preprocessing.image.img_to_array(img)
    img = tf.image.resize(img, (IMG_SIZE, IMG_SIZE), antialias=True)
    img = tf.keras.applications.mobilenet_v2.preprocess_input(img)
    pred = model.predict(tf.expand_dims(img, 0), verbose=0)[0][0]
    age = float(np.clip(pred, 0, 100))
    return f"Estimated age: {age:.1f} years"

demo = gr.Interface(
    fn=infer_gradio,
    inputs=gr.Image(type="pil", label="Upload a cropped face image"),
    outputs=gr.Textbox(label="Prediction"),
    title="Age Regression (UTKFace via kagglehub)",
    description="Upload a cropped face image. The model returns an estimated age (continuous)."
)
demo.launch(debug=False, share=True)


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://21edb974bd5b194647.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


