In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import torchvision.transforms as transforms
import numpy as np
import cv2
import os
from sklearn.model_selection import train_test_split


In [2]:
dataset_path = r"C:\Users\Farouk\Desktop\age-prediction\data\data-raw\UTKFace"
image_files = [os.path.join(dataset_path, f) for f in os.listdir(dataset_path) if f.endswith(".jpg")]
ages = [int(os.path.basename(f).split("_")[0]) for f in image_files]
train_files, val_files, train_ages, val_ages = train_test_split(image_files, ages, test_size=0.2, random_state=42)


In [3]:
class UTKFaceDataset(Dataset):
    def __init__(self, file_paths, ages, transform=None):
        self.file_paths = file_paths
        self.ages = ages
        self.transform = transform

    def __len__(self):
        return len(self.file_paths)

    def __getitem__(self, idx):
        img_path = self.file_paths[idx]
        age = self.ages[idx]
        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = cv2.resize(img, (128, 128))
        if self.transform:
            img = self.transform(img)
        return img, age

In [4]:
# Transforms (using torchvision)
train_transforms = transforms.Compose([
    transforms.ToPILImage(), # Convert to PIL Image for transforms
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(20),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # Example normalization
])

In [5]:
val_transforms = transforms.Compose([
    transforms.ToPILImage(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [6]:
# Datasets and DataLoaders
train_dataset = UTKFaceDataset(train_files, train_ages, transform=train_transforms)
val_dataset = UTKFaceDataset(val_files, val_ages, transform=val_transforms)

batch_size = 64
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False) # No shuffle for validation


In [7]:
# TinyVGG model (PyTorch)
class TinyVGG(nn.Module):
    def __init__(self):
        super(TinyVGG, self).__init__()
        self.conv_layers = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        self.fc_layers = nn.Sequential(
            nn.Flatten(),
            nn.Linear(128 * 16 * 16, 256),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(256, 1)  # Regression output
        )

    def forward(self, x):
        x = self.conv_layers(x)
        x = self.fc_layers(x)
        return x

In [8]:
model = TinyVGG()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device) # Move model to GPU if available

criterion = nn.L1Loss()  # MAE loss
optimizer = optim.Adam(model.parameters())

train_steps = len(train_loader) # No need for // batch_size with DataLoader
val_steps = len(val_loader)

In [9]:
def train_model(model, train_loader, val_loader, criterion, optimizer, epochs=30):
    best_val_loss = float('inf')
    patience = 3
    epochs_no_improve = 0
    history = {'train_loss': [], 'val_loss': []}

    for epoch in range(epochs):
        model.train()
        train_loss = 0.0
        for images, ages in train_loader:
            images = images.to(device)
            ages = ages.to(device).float().unsqueeze(1) # Move to device, make float, add dimension

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, ages)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()

        train_loss /= len(train_loader)
        history['train_loss'].append(train_loss)

        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for images, ages in val_loader:
                images = images.to(device)
                ages = ages.to(device).float().unsqueeze(1)

                outputs = model(images)
                loss = criterion(outputs, ages)
                val_loss += loss.item()

        val_loss /= len(val_loader)
        history['val_loss'].append(val_loss)

        print(f"Epoch {epoch+1}/{epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}")

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            epochs_no_improve = 0
        else:
            epochs_no_improve += 1
            if epochs_no_improve >= patience:
                print("Early stopping!")
                break
    return model, history


In [10]:
import mlflow
import mlflow.pytorch

In [11]:
mlflow.tensorflow.autolog()

In [12]:
mlflow.set_experiment("Age_prediction_Experiment ")

2025/02/08 22:17:27 INFO mlflow.tracking.fluent: Experiment with name 'Age_prediction_Experiment ' does not exist. Creating a new experiment.


<Experiment: artifact_location='file:///c:/Users/Farouk/AppData/Local/Microsoft/Windows/INetCache/IE/1ZYJTOHE/mlruns/381621899599383846', creation_time=1739049447173, experiment_id='381621899599383846', last_update_time=1739049447173, lifecycle_stage='active', name='Age_prediction_Experiment ', tags={}>

In [13]:
with mlflow.start_run(run_name="TinyVGG_Model_best"):
    trained_model, history = train_model(model, train_loader, val_loader, criterion, optimizer)
    mlflow.pytorch.log_model(trained_model, "tinyvgg_model")
    mlflow.log_dict(history, "training_history.json") # Log history

Epoch 1/30, Train Loss: 13.9719, Val Loss: 11.4516
Epoch 2/30, Train Loss: 11.1254, Val Loss: 9.5211
Epoch 3/30, Train Loss: 9.9151, Val Loss: 8.3874
Epoch 4/30, Train Loss: 9.1024, Val Loss: 8.0438
Epoch 5/30, Train Loss: 8.6068, Val Loss: 7.7405
Epoch 6/30, Train Loss: 8.2867, Val Loss: 7.3239
Epoch 7/30, Train Loss: 7.9921, Val Loss: 7.2306
Epoch 8/30, Train Loss: 7.8453, Val Loss: 6.8888
Epoch 9/30, Train Loss: 7.5986, Val Loss: 6.7418
Epoch 10/30, Train Loss: 7.5235, Val Loss: 6.7270
Epoch 11/30, Train Loss: 7.4013, Val Loss: 6.8151
Epoch 12/30, Train Loss: 7.4327, Val Loss: 6.6322
Epoch 13/30, Train Loss: 7.2202, Val Loss: 6.6235
Epoch 14/30, Train Loss: 7.1989, Val Loss: 6.6815
Epoch 15/30, Train Loss: 7.1074, Val Loss: 6.5086
Epoch 16/30, Train Loss: 7.1878, Val Loss: 6.4100
Epoch 17/30, Train Loss: 6.9989, Val Loss: 6.8366
Epoch 18/30, Train Loss: 7.0160, Val Loss: 6.4667




Epoch 19/30, Train Loss: 6.9047, Val Loss: 6.8773
Early stopping!




In [14]:
save_directory = r"C:\Users\Farouk\Desktop\age-prediction\models" # Example for Windows - Replace with your path
model_save_path = os.path.join(save_directory, "Tinyvgg_Best.pth") # Combine path and filename

torch.save(model.state_dict(), model_save_path)  # Save the state dictionary
print(f"Model saved to: {model_save_path}")  # Print confirmation message

Model saved to: C:\Users\Farouk\Desktop\age-prediction\models\Tinyvgg_Best.pth
