# TP4, INF8225 2025

**Sources**

* Dataset: [UTKFace](https://susanqq.github.io/UTKFace/)

# Imports and data initializations

## Imports

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install torchinfo > /dev/null
!pip install einops > /dev/null
!pip install wandb > /dev/null
!pip install opendatasets > /dev/null


In [None]:
from itertools import takewhile
from collections import Counter, defaultdict

import numpy as np
import math
from sklearn.model_selection import train_test_split
import pandas as pd

from typing import Dict, List

import torch
# cpal
print(torch.__version__)

import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data.dataset import Dataset
from torch.utils.data import DataLoader
from torch.nn.utils.rnn import pad_sequence
from torchvision import transforms as T

import einops
import wandb
from torchinfo import summary

import os
import opendatasets as od

import multiprocessing as mp

# plotting
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
import plotly.express as px

from PIL import Image
import os

import sympy

import copy

import timm

import pprint

import gc

DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


2.6.0+cu124


In [None]:
CHECKPOINT_DIR = '/content/drive/MyDrive/INF8225_TP4_ckpt_f'
os.makedirs(CHECKPOINT_DIR, exist_ok=True)

## Dataset analysis

In [None]:
# Our dataset
dataset = 'https://www.kaggle.com/datasets/nipunarora8/age-gender-and-ethnicity-face-data-csv/'
od.download(dataset)

Please provide your Kaggle credentials to download this dataset. Learn more: http://bit.ly/kaggle-creds
Your Kaggle username: clovisjohn
Your Kaggle Key: ··········
Dataset URL: https://www.kaggle.com/datasets/nipunarora8/age-gender-and-ethnicity-face-data-csv


In [None]:
dataset = 'https://www.kaggle.com/datasets/jangedoo/utkface-new/data'
od.download(dataset)

Please provide your Kaggle credentials to download this dataset. Learn more: http://bit.ly/kaggle-creds
Your Kaggle username: clovisjohn
Your Kaggle Key: ··········
Dataset URL: https://www.kaggle.com/datasets/jangedoo/utkface-new


### UTKFACE

In [None]:
df = pd.read_csv('./age-gender-and-ethnicity-face-data-csv/age_gender.csv')
df.head()

Unnamed: 0,age,ethnicity,gender,img_name,pixels
0,1,2,0,20161219203650636.jpg.chip.jpg,129 128 128 126 127 130 133 135 139 142 145 14...
1,1,2,0,20161219222752047.jpg.chip.jpg,164 74 111 168 169 171 175 182 184 188 193 199...
2,1,2,0,20161219222832191.jpg.chip.jpg,67 70 71 70 69 67 70 79 90 103 116 132 145 155...
3,1,2,0,20161220144911423.jpg.chip.jpg,193 197 198 200 199 200 202 203 204 205 208 21...
4,1,2,0,20161220144914327.jpg.chip.jpg,202 205 209 210 209 209 210 211 212 214 218 21...


.

**nipunarora8/age-gender-and-ethnicity-face-data-csv has images scaled down to 48x48 so we load original 200x200 images from jangedoo/utkface-new/data**

**We could decide to use directly jangedoo/utkface-new/data and parse features from file names bold text**

.

In [None]:

folder = './utkface-new/UTKFace'
for filename in os.listdir(folder):
    old_filepath = os.path.join(folder, filename)

    # Ensure that the item is a file.
    if os.path.isfile(old_filepath):
        parts = filename.split('_', maxsplit=3)

        if len(parts) == 4:
            new_filename = parts[3]
            new_filepath = os.path.join(folder, new_filename)

            os.rename(old_filepath, new_filepath)

In [None]:
original_images_dir = './utkface-new/UTKFace'

def load_original_image(img_name):
    image_path = os.path.join(original_images_dir, img_name)
    try:
        img = Image.open(image_path).convert('L')
        return np.array(img, dtype='float32')
    except Exception as e:
        print(f"Error loading {image_path}: {e}")
        return None

df['pixels'] = df['img_name'].apply(load_original_image)

df.head()


Unnamed: 0,age,ethnicity,gender,img_name,pixels
0,1,2,0,20161219203650636.jpg.chip.jpg,"[[129.0, 129.0, 129.0, 128.0, 128.0, 127.0, 12..."
1,1,2,0,20161219222752047.jpg.chip.jpg,"[[189.0, 173.0, 150.0, 124.0, 99.0, 79.0, 72.0..."
2,1,2,0,20161219222832191.jpg.chip.jpg,"[[69.0, 68.0, 67.0, 66.0, 67.0, 69.0, 72.0, 74..."
3,1,2,0,20161220144911423.jpg.chip.jpg,"[[193.0, 193.0, 194.0, 195.0, 195.0, 196.0, 19..."
4,1,2,0,20161220144914327.jpg.chip.jpg,"[[201.0, 201.0, 202.0, 203.0, 203.0, 204.0, 20..."


### Appa Real Face

In [None]:
DATASET_URL = "https://www.kaggle.com/datasets/abhikjha/appa-real-face-cropped"
od.download(DATASET_URL)

Please provide your Kaggle credentials to download this dataset. Learn more: http://bit.ly/kaggle-creds
Your Kaggle username: clovisjohn
Your Kaggle Key: ··········
Dataset URL: https://www.kaggle.com/datasets/abhikjha/appa-real-face-cropped


In [None]:
ROOT_DIR    = "./appa-real-face-cropped"      # dossier créé par opendatasets
IMG_FOLDER  = os.path.join(ROOT_DIR, "final_files/final_files")
CSV_PATH    = os.path.join(ROOT_DIR, "labels.csv")

In [None]:
df_apa = pd.read_csv(CSV_PATH)
df_apa.rename(columns={"file_name": "img_name", "real_age": "age"}, inplace=True)

In [None]:
def load_image_gray(img_name: str) -> np.ndarray:
    path = os.path.join(IMG_FOLDER, img_name)
    try:
        img = Image.open(path).convert("L")
        return np.array(img, dtype='float32')

    except Exception as e:
        print(f"[WARN] cannot load {path}: {e}")
        return None

In [None]:
df_apa["pixels"] = df_apa["img_name"].apply(load_image_gray)

# 6) Vérification rapide
print("Total images :", len(df_apa))
print("Exemple :", df_apa.iloc[0][["img_name", "age"]].to_dict(),
      "| shape", df_apa["pixels"].iloc[0].shape)

Total images : 7591
Exemple : {'img_name': '000000.jpg', 'age': 4} | shape (114, 114)


## Dataset Preprocessing

In [None]:
# Normalization
# df['pixels'] = df['pixels'].apply(lambda x: x/255)

In [None]:
class UTKFaceDataset(Dataset):
    def __init__(self, df, num_classes, transform=None, label_mode: str = 'class'):
        super().__init__()
        self.df = df
        self.num_classes = num_classes
        self.transform = transform
        self.label_mode = label_mode

    def __len__(self):
        return len(self.df)

    def __getitem__(self,i):
        row = self.df.iloc[i]
        age = row['age']
        img_np = row['pixels']

        if self.label_mode in ['class', "vgg"]:
          if img_np.dtype != np.uint8:
              img_np = img_np.astype(np.uint8)
          img = Image.fromarray(img_np, mode='L')

          if self.transform is not None:
              img = self.transform(img)
          else:
              img = torch.as_tensor(img_np, dtype=torch.float32)[None] / 255.


          if self.label_mode == 'class':
            class_idx = int((age - 1) / (90 / self.num_classes))
            class_idx = min(class_idx, self.num_classes - 1)
            label  = torch.tensor(class_idx, dtype=torch.long)
          else:
            label = torch.tensor(float(age), dtype=torch.float32)

          return img, label
        else:
          if img_np.shape != (200, 200):
              img_pil = Image.fromarray(img_np.astype(np.uint8), mode='L')
              img_pil = img_pil.resize((200, 200), Image.BILINEAR)
              img_np  = np.asarray(img_pil, dtype=np.uint8)

          img_np = img_np / 255.0
          img_np = np.reshape(img_np, (1, 200, 200))
          img_np = torch.tensor(img_np, dtype=torch.float32)

          age = torch.tensor(age, dtype=torch.float32).unsqueeze(0)  # Tenseur scalaire pour la régression
          return img_np, age

# Based on https://github.com/Ebimsv/Facial_Age_estimation_PyTorch/blob/main/custom_dataset_dataloader.py
# --------------------------- TRANSFORMS -------------------------------------
# Training pipeline
train_transform = T.Compose([
    T.Resize(224),
    T.ToTensor(),
    T.Normalize(mean=[0.5], std=[0.5])
])

# Validation / test pipeline (deterministic)
eval_transform = T.Compose([
    T.Resize(256),
    T.CenterCrop(224),
    T.ToTensor(),
    T.Normalize(mean=[0.5], std=[0.5])
])

# def preprocess(
#     df: pd.DataFrame,
#     min_age: int = 0,
#     max_age: int = 90
# )
#     """Preprocess the dataset.
#     Remove rows where age < min_age or > max_age
#     """
#     filtered = df[(df['age'] >= min_age) & (df['age'] <= max_age)].reset_index(drop=True)
#     return filtered


def build_datasets(
      df: pd.DataFrame,
      min_age: int = 1,
      max_age: int = 90,
      num_classes = 30,
      label_mode='class'
    ) -> tuple:
    """Build the training, validation and testing datasets.
    Remove rows where age < min_age or > max_age

    Args
    ----

    Output
    ------
        - (train_dataset, val_dataset): Tuple of the two UTKFaceDataset objects.
    """
    filtered = df[(df['age'] >= min_age) & (df['age'] <= max_age)].reset_index(drop=True)
    train, test = train_test_split(filtered, test_size=0.1, random_state=0)
    train = UTKFaceDataset(train,num_classes, transform=train_transform,label_mode=label_mode)
    test = UTKFaceDataset(test,num_classes,transform=eval_transform,label_mode=label_mode)

    return train, test


# Models architecture




## CNN models


### CNN 1


In [None]:
# Basic CNN to test full notebook. Source : https://www.datacamp.com/tutorial/pytorch-cnn-tutorial

import torch
import torch.nn as nn
import torch.nn.functional as F

class CNN1(nn.Module):
    def __init__(self, in_channels, dim_hidden=256, dropout=0.1):
        """
        Convolutional Neural Network for real-valued age prediction.

        Parameters
        ----------
        in_channels : int
            Number of input image channels (e.g., 1 for grayscale images).
        dim_hidden : int
            Number of neurons in the fully connected layer.
        dropout : float
            Dropout rate to use before the final regression layer.
        """
        super(CNN1, self).__init__()

        # First convolutional layer
        self.conv1 = nn.Conv2d(in_channels=in_channels, out_channels=8, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)  # Downsampling: 200x200 → 100x100

        # Second convolutional layer
        self.conv2 = nn.Conv2d(in_channels=8, out_channels=16, kernel_size=3, padding=1)
        # Downsampling again: 100x100 → 50x50

        # Dropout layer
        self.dropout = nn.Dropout(dropout)

        # Fully connected layer to produce real-valued age prediction
        self.fc1 = nn.Linear(16 * 50 * 50, dim_hidden)
        self.fc2 = nn.Linear(dim_hidden, 1)  # Output layer

    def forward(self, x):
        """
        Forward pass of the model.

        Parameters
        ----------
        x : torch.Tensor
            Batch of input images with shape (batch_size, in_channels, H, W)

        Returns
        -------
        torch.Tensor
            Predicted age for each image in the batch, shape (batch_size, 1)
        """
        x = F.relu(self.conv1(x))  # Convolution 1 + ReLU activation
        x = self.pool(x)           # Pooling 1
        x = F.relu(self.conv2(x))  # Convolution 2 + ReLU activation
        x = self.pool(x)           # Pooling 2

        x = x.view(x.size(0), -1)  # Flatten the tensor
        x = F.relu(self.fc1(x))    # Fully connected + ReLU
        x = self.dropout(x)        # Apply dropout
        x = self.fc2(x)            # Final output: predicted age
        return x.squeeze(1)  # Optionally remove final dimension for regression


### CNN 2


In [None]:
class CNN2(nn.Module) :
    def __init__(self,
                 age_classes: int,
                 hidden_dim: int,
                 dropout: float
                 ) :
        super(CNN2, self).__init__()
        '''
        '''
        self.cnnModel = nn.Sequential(
            nn.Conv2d(1, 64, kernel_size = 3, stride = 1, padding = 1),
            nn.BatchNorm2d(64),
            nn.LeakyReLU(),
            nn.Dropout2d(dropout),
            nn.MaxPool2d(kernel_size = 2, stride = 2, padding = 1),

            nn.Conv2d(64, 128, kernel_size = 3, stride = 1, padding = 1),
            nn.BatchNorm2d(128),
            nn.LeakyReLU(),
            nn.MaxPool2d(kernel_size = 2, stride = 2, padding = 1),

            nn.Conv2d(128, 256, kernel_size = 3, stride = 1, padding = 1),
            nn.BatchNorm2d(256),
            nn.LeakyReLU(),
            nn.MaxPool2d(kernel_size = 2, stride = 2, padding = 1),

            #nn.LeakyReLU(),
            nn.AdaptiveAvgPool2d((1,1)) #flatten
        )

        self.dnnModel = nn.Sequential(
            nn.Linear(256, hidden_dim),
            nn.LeakyReLU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim, 32),
            nn.LeakyReLU(),
            nn.Dropout(dropout),
        )

        self.age_classifier = nn.Linear(32, age_classes)
    def forward(self, x) :
        '''
        output = self.model.forward_features(x)
        age = self.age_classifier(output)
        '''
        output = self.cnnModel(x)
        output = output.view(output.size(0), -1)
        output = self.dnnModel(output)

        age = self.age_classifier(output)
        #print(age.shape)
        return age

# Comparison

In [None]:
def class_to_mid_age(idx: int, num_classes: int = 30, min_age: int = 1,
                     max_age: int = 90) -> float:

    bin_w = (max_age - min_age + 1) / num_classes
    lower = min_age + idx * bin_w
    return lower + bin_w / 2.0

In [None]:
def mae(pred: torch.Tensor, tgt: torch.Tensor) -> float:
    return torch.mean(torch.abs(pred - tgt)).item()

def rmse(pred: torch.Tensor, tgt: torch.Tensor) -> float:
    return math.sqrt(torch.mean((pred - tgt) ** 2).item())

def r2(pred: torch.Tensor, tgt: torch.Tensor) -> float:
    ss_res = torch.sum((tgt - pred) ** 2)
    ss_tot = torch.sum((tgt - torch.mean(tgt)) ** 2)
    return 1.0 - ss_res.item() / ss_tot.item()

In [None]:
MODEL_CONFIGS = [
    {
        "name": "CNN1-reg",
        "task": "regression",
        "checkpoint": f"{CHECKPOINT_DIR}/cnn1.pt",
        "build_fn": lambda: CNN1(in_channels=1, dim_hidden=512, dropout=0.1),
    },
    {
        "name": "VGG16-reg",
        "task": "regression",
        "checkpoint": f"{CHECKPOINT_DIR}/vgg16_reg_epoch_40_max.pt",
        "build_fn": lambda: timm.create_model(
            "vgg16_bn.tv_in1k", pretrained=False, in_chans=1, num_classes=1
        ),
    },
    {
        "name": "VGG16-reg-SmoothL1",
        "task": "regression",
        "checkpoint": f"{CHECKPOINT_DIR}/vgg16_reg_epoch_40_max_SmoothL1Loss.pt",
        "build_fn": lambda: timm.create_model(
            "vgg16_bn.tv_in1k", pretrained=False, in_chans=1, num_classes=1
        ),
    },
    {
        "name": "CNN2-cls",
        "task": "classification",
        "checkpoint": f"{CHECKPOINT_DIR}/cnn2_final.pt",
        "build_fn": lambda: CNN2(age_classes=30, hidden_dim=512, dropout=0.1),
    },
    {
        "name": "VGG16-cls",
        "task": "classification",
        "checkpoint": f"{CHECKPOINT_DIR}/vgg16_epoch_50_max.pt",
        "build_fn": lambda: timm.create_model(
            "vgg16_bn.tv_in1k", pretrained=False, in_chans=1, num_classes=30
        ),
    }
]


In [None]:
NUM_WORKERS = 8 if os.cpu_count() >= 12 else 2
BATCH_SIZE = 256           # identical for every model; tweak if GPU memory is low
NUM_CLASSES = 30
torch.backends.cudnn.benchmark = True

In [None]:
df_to_use = df

_, test_cls = build_datasets(
    df_to_use, min_age=1, max_age=90, num_classes=NUM_CLASSES, label_mode="class"
)
test_loader_cls = DataLoader(
    test_cls, batch_size=BATCH_SIZE, shuffle=False,
    num_workers=NUM_WORKERS, pin_memory=True
)

_, test_vgg_reg = build_datasets(
    df_to_use, min_age=1, max_age=90, num_classes=NUM_CLASSES, label_mode="vgg_reg"
)
test_loader_vgg_reg = DataLoader(
    test_vgg_reg, batch_size=BATCH_SIZE, shuffle=False,
    num_workers=NUM_WORKERS, pin_memory=True
)


_, test_reg = build_datasets(
    df_to_use, min_age=1, max_age=90, num_classes=NUM_CLASSES, label_mode="regression"
)
test_loader_reg = DataLoader(
    test_reg, batch_size=BATCH_SIZE, shuffle=False,
    num_workers=NUM_WORKERS, pin_memory=True
)

print(f"Classification test samples : {len(test_cls):,}")
print(f"Regression    test samples : {len(test_reg):,}\n")

Classification test samples : 2,362
Regression    test samples : 2,362



In [None]:
def evaluate_classification(model: torch.nn.Module,
                            loader: DataLoader,
                            num_classes: int = NUM_CLASSES) -> Dict[str, float]:
    model.eval()
    correct, n_total = 0, 0
    mae_sum = 0.0

    with torch.no_grad():
        for images, targets in loader:
            images   = images.to(DEVICE)
            targets  = targets.to(DEVICE)
            logits   = model(images)

            pred_cls = torch.argmax(logits, dim=1)
            correct += (pred_cls == targets).sum().item()
            n_total += targets.size(0)

            # (mid-point approximation)
            tgt_age = class_to_mid_age(targets, num_classes).to(DEVICE)
            pred_age = class_to_mid_age(pred_cls, num_classes).to(DEVICE)
            mae_sum += torch.sum(torch.abs(pred_age - tgt_age)).item()

    return {
        "top1_acc(%)": 100.0 * correct / n_total,
        "MAE(years)": mae_sum / n_total,
    }

In [None]:
def evaluate_regression(model: torch.nn.Module,
                        loader: DataLoader) -> Dict[str, float]:
    model.eval()
    preds, tgts = [], []

    with torch.no_grad():
        for images, targets in loader:
            images  = images.to(DEVICE)
            targets = targets.to(DEVICE).float().view(-1)
            outputs = model(images).view(-1)

            preds.append(outputs)
            tgts.append(targets)

    preds = torch.cat(preds)
    tgts  = torch.cat(tgts)

    return {
        "MAE(years)": mae(preds, tgts),
        "RMSE(years)": rmse(preds, tgts),
        "R2": r2(preds, tgts),
    }

## Results

### UTKFACE

In [None]:
results = []
for cfg in MODEL_CONFIGS:
    name = cfg["name"]
    task = cfg["task"]
    ckpt = cfg["checkpoint"]

    # if not ckpt.is_file():
    #     print(f"[WARN] checkpoint not found: {ckpt}")
    #     continue

    print(f"→ Loading {name} …")
    model = cfg["build_fn"]().to(DEVICE)
    state = torch.load(ckpt, map_location=DEVICE)
    model.load_state_dict(state, strict=False)

    # choose appropriate loader / evaluator
    if task == "classification":
          metrics = evaluate_classification(model, test_loader_cls)
    elif task == "regression":
        if name in ["VGG16-reg", "VGG16-reg-SmoothL1"]:
          metrics = evaluate_regression(model, test_loader_vgg_reg)
        else:
          metrics = evaluate_regression(model, test_loader_reg)
    else:
        raise ValueError(f"Unknown task type: {task}")

    results.append(
        {"model": name, "task": task, **metrics}
    )

    del model, state
    torch.cuda.empty_cache(); gc.collect()


res_df = pd.DataFrame(results)
print("\n===== BENCHMARK RESULTS =====")
print(res_df.to_string(index=False, justify="center"))

→ Loading CNN1-reg …
→ Loading VGG16-reg …
→ Loading VGG16-reg-SmoothL1 …
→ Loading CNN2-cls …
→ Loading VGG16-cls …

===== BENCHMARK RESULTS =====
      model             task       MAE(years)  RMSE(years)    R2     top1_acc(%)
          CNN1-reg     regression   2.570696    4.545654   0.943272        NaN  
         VGG16-reg     regression   7.097814    9.478301   0.753359        NaN  
VGG16-reg-SmoothL1     regression   6.382741    8.725995   0.790958        NaN  
          CNN2-cls classification  10.108806         NaN        NaN  22.015241  
         VGG16-cls classification   5.672312         NaN        NaN  29.254869  


### APPA

In [None]:
df_to_use = df_apa

_, test_cls = build_datasets(
    df_to_use, min_age=1, max_age=90, num_classes=NUM_CLASSES, label_mode="class"
)
test_loader_cls = DataLoader(
    test_cls, batch_size=BATCH_SIZE, shuffle=False,
    num_workers=NUM_WORKERS, pin_memory=True
)

_, test_vgg_reg = build_datasets(
    df_to_use, min_age=1, max_age=90, num_classes=NUM_CLASSES, label_mode="vgg_reg"
)
test_loader_vgg_reg = DataLoader(
    test_vgg_reg, batch_size=BATCH_SIZE, shuffle=False,
    num_workers=NUM_WORKERS, pin_memory=True
)


_, test_reg = build_datasets(
    df_to_use, min_age=1, max_age=90, num_classes=NUM_CLASSES, label_mode="regression"
)
test_loader_reg = DataLoader(
    test_reg, batch_size=BATCH_SIZE, shuffle=False,
    num_workers=NUM_WORKERS, pin_memory=True
)

print(f"Classification test samples : {len(test_cls):,}")
print(f"Regression    test samples : {len(test_reg):,}\n")

Classification test samples : 758
Regression    test samples : 758



In [None]:
results = []
for cfg in MODEL_CONFIGS:
    name = cfg["name"]
    task = cfg["task"]
    ckpt = cfg["checkpoint"]

    # if not ckpt.is_file():
    #     print(f"[WARN] checkpoint not found: {ckpt}")
    #     continue

    print(f"→ Loading {name} …")
    model = cfg["build_fn"]().to(DEVICE)
    state = torch.load(ckpt, map_location=DEVICE)
    model.load_state_dict(state, strict=False)

    # choose appropriate loader / evaluator
    if task == "classification":
          metrics = evaluate_classification(model, test_loader_cls)
    elif task == "regression":
        if name in ["VGG16-reg", "VGG16-reg-SmoothL1"]:
          metrics = evaluate_regression(model, test_loader_vgg_reg)
        else:
          metrics = evaluate_regression(model, test_loader_reg)
    else:
        raise ValueError(f"Unknown task type: {task}")

    results.append(
        {"model": name, "task": task, **metrics}
    )

    del model, state
    torch.cuda.empty_cache(); gc.collect()


res_df = pd.DataFrame(results)
print("\n===== BENCHMARK RESULTS =====")
print(res_df.to_string(index=False, justify="center"))

→ Loading CNN1-reg …
→ Loading VGG16-reg …
→ Loading VGG16-reg-SmoothL1 …
→ Loading CNN2-cls …
→ Loading VGG16-cls …

===== BENCHMARK RESULTS =====
      model             task       MAE(years)  RMSE(years)     R2     top1_acc(%)
          CNN1-reg     regression  16.319407   21.131105   -0.500616        NaN  
         VGG16-reg     regression  14.216772   18.769891   -0.183992        NaN  
VGG16-reg-SmoothL1     regression  14.249439   18.302177   -0.125721        NaN  
          CNN2-cls classification  26.441953         NaN         NaN   4.617414  
         VGG16-cls classification  14.576517         NaN         NaN   8.311346  
