In [None]:
# Imports
import os
import zipfile
import kaggle
import pandas as pd
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.applications.resnet import ResNet50

In [None]:
# Define dataset paths
DATASET_NAME = "abhikjha/appa-real-face-cropped"
DATASET_PATH = "appa_real_dataset"
ZIP_FILE = "appa-real-face-cropped.zip"

# Check if the dataset is already downloaded
if not os.path.exists(DATASET_PATH):
    print("Downloading and extracting dataset...")
    os.system(f"kaggle datasets download -d {DATASET_NAME} -p .")
    
    if os.path.exists(ZIP_FILE):
        with zipfile.ZipFile(ZIP_FILE, "r") as zip_ref:
            zip_ref.extractall(DATASET_PATH)
        print("Dataset successfully extracted.")
    else:
        raise FileNotFoundError(f"Error: {ZIP_FILE} not found after download.")
else:
    print("Dataset already exists. Skipping download.")

Dataset already exists. Skipping download.


In [None]:
# Prepare dataset
image_dir = os.path.join(DATASET_PATH, "final_files", "final_files")  # Correct image directory
labels_file = os.path.join(DATASET_PATH, "labels.csv")  # Correct labels file

df = pd.read_csv(labels_file)  # Load age labels

# Ensure that file paths are set only once
df["file_name"] = df["file_name"].apply(lambda x: os.path.join(image_dir, os.path.basename(x)))

df.to_csv(os.path.join(DATASET_PATH, "labels.csv"), index=False)

In [None]:
# Function to load training and validation data
def load_data(dataset_path, batch_size=32, img_size=(150, 150)):
    df = pd.read_csv(os.path.join(dataset_path, "labels.csv"))
    
    datagen = ImageDataGenerator(
        rescale=1/255.,
        horizontal_flip=True,
        vertical_flip=True,
        validation_split=0.2
    )
    
    train_gen = datagen.flow_from_dataframe(
        dataframe=df,
        directory=None,
        x_col="file_name",
        y_col="real_age",
        target_size=img_size,
        batch_size=batch_size,
        class_mode='raw',
        subset='training',
        seed=42
    )
    
    val_gen = datagen.flow_from_dataframe(
        dataframe=df,
        directory=None,
        x_col="file_name",
        y_col="real_age",
        target_size=img_size,
        batch_size=batch_size,
        class_mode='raw',
        subset='validation',
        seed=42
    )
    
    return train_gen, val_gen

In [None]:
# Function to create the age prediction model
def create_model(input_shape):
    backbone = ResNet50(input_shape=input_shape,
                        weights='imagenet',
                        include_top=False)
    
    # Unfreeze the last 15 layers for fine-tuning
    for layer in backbone.layers[-15:]:
        layer.trainable = True
    
    model = Sequential([
        backbone,
        GlobalAveragePooling2D(),
        BatchNormalization(),
        Dense(512, activation='relu'),
        Dropout(0.3),
        Dense(1, activation='linear')  # Regression output
    ])
    
    model.compile(
        optimizer=Adam(learning_rate=0.0001),
        loss='mean_absolute_error',  # MAE loss for regression
        metrics=['mae']
    )
    
    return model

In [None]:
# Function to train the model
def train_model(model, train_data, val_data, batch_size=32, epochs=30):
    steps_per_epoch = len(train_data)
    validation_steps = len(val_data)
    
    history = model.fit(
        train_data,
        validation_data=val_data,
        epochs=epochs,
        steps_per_epoch=steps_per_epoch,
        validation_steps=validation_steps,
        verbose=2
    )
    
    return model, history

In [None]:
# Load training and validation data
train_data, val_data = load_data(DATASET_PATH, batch_size=32, img_size=(150, 150))

Found 6073 validated image filenames.
Found 1518 validated image filenames.


In [None]:
# Create and train the model
model = create_model((150, 150, 3))
trained_model, history = train_model(model, train_data, val_data, epochs=30)

Epoch 1/30


  self._warn_if_super_not_called()


190/190 - 382s - 2s/step - loss: 15.4882 - mae: 15.4882 - val_loss: 16.6954 - val_mae: 16.6954
Epoch 2/30
190/190 - 351s - 2s/step - loss: 9.6292 - mae: 9.6292 - val_loss: 12.8929 - val_mae: 12.8929
Epoch 3/30
190/190 - 351s - 2s/step - loss: 8.7786 - mae: 8.7786 - val_loss: 11.9120 - val_mae: 11.9120
Epoch 4/30
190/190 - 353s - 2s/step - loss: 8.1172 - mae: 8.1172 - val_loss: 9.0173 - val_mae: 9.0173
Epoch 5/30
190/190 - 353s - 2s/step - loss: 7.6057 - mae: 7.6057 - val_loss: 11.0399 - val_mae: 11.0399
Epoch 6/30
190/190 - 353s - 2s/step - loss: 7.3612 - mae: 7.3612 - val_loss: 8.1152 - val_mae: 8.1152
Epoch 7/30
190/190 - 353s - 2s/step - loss: 7.0648 - mae: 7.0648 - val_loss: 7.2048 - val_mae: 7.2048
Epoch 8/30
190/190 - 354s - 2s/step - loss: 6.6182 - mae: 6.6182 - val_loss: 7.1888 - val_mae: 7.1888
Epoch 9/30
190/190 - 353s - 2s/step - loss: 6.4354 - mae: 6.4354 - val_loss: 7.4030 - val_mae: 7.4030
Epoch 10/30
190/190 - 353s - 2s/step - loss: 6.2419 - mae: 6.2419 - val_loss: 7.596

In [None]:
# Evaluate the model
test_loss, test_mae = trained_model.evaluate(val_data, verbose=2)
print(f"Test MAE: {test_mae:.2f}")

48/48 - 13s - 270ms/step - loss: 6.5265 - mae: 6.5265
Test MAE: 6.53
