In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
import cv2
import random
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error
from tensorflow.keras import layers as L
from sklearn.preprocessing import LabelBinarizer
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization, GlobalAveragePooling2D, Lambda
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import Sequence
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
import seaborn as sns

# Data Pre-processing

In [None]:
data = pd.read_csv('../data/age_gender.csv')

## Converting pixels into numpy array
data['pixels']=data['pixels'].apply(lambda x:  np.array(x.split(), dtype="float32"))

data.head()

In [None]:
seed = 42
tf.random.set_seed(seed)
np.random.seed(seed)
random.seed(seed)

In [None]:
# Prepare the images and resize them to 128x128 using OpenCV
def resize_image(img, size=(224, 224)):
    img = img.reshape((48, 48))  # Reshape to 48x48
    img = cv2.resize(img, size)  # Resize to 128x128
    return img


In [None]:
X = np.array([resize_image(img) for img in data['pixels']])  # Resize each image
X = np.expand_dims(X, axis=-1)  # Add the channel dimension

In [None]:
# Normalize the pixel values to [0, 1]
X = X / 255.0

In [None]:
print('Total rows: {}'.format(len(data)))
print('Total columns: {}'.format(len(data.columns)))

In [None]:
plt.figure(figsize=(16,16))
for i in range(1500,1520):
    plt.subplot(5,5,(i%25)+1)
    plt.xticks([])
    plt.yticks([])
    plt.grid(False)
    plt.imshow(data['pixels'].iloc[i].reshape(48,48), cmap='gray')
    plt.xlabel(
        "Age:"+str(data['age'].iloc[i])+
        "  Ethnicity:"+str(data['ethnicity'].iloc[i])+
        "  Gender:"+ str(data['gender'].iloc[i])
    )
plt.show()

In [None]:
y = data['age']

# Split the data into training and temporary sets
X_train, X_temp, y_train, y_temp = train_test_split(
    X, y, test_size=0.30, random_state=37
)  # 70% training, 30% temporary

# Split the temporary set into validation and test sets
X_val, X_test, y_val, y_test = train_test_split(
    X_temp, y_temp, test_size=0.50, random_state=37
)  # 50% of 30% = 15% test, 15% validation

# Custom CNN Model for Age Prediction

In [None]:
# Define the model
model = tf.keras.Sequential([
    L.InputLayer(input_shape=(224, 224, 1)),
    L.Conv2D(32, (3, 3), activation='relu'),
    L.BatchNormalization(),
    L.MaxPooling2D((2, 2)),
    L.Conv2D(64, (3, 3), activation='relu'),
    L.MaxPooling2D((2, 2)),
    L.Conv2D(128, (3, 3), activation='relu'),
    L.MaxPooling2D((2, 2)),
    L.Flatten(),
    L.Dense(64, activation='relu'),
    L.Dropout(rate=0.5),
    L.Dense(1, activation='linear')  # Single output for regression
])

# Print the model summary
model.summary()

In [None]:
# Use the Adam optimizer
model.compile(optimizer='adam',
              loss='huber',
              metrics=['mae'])

In [None]:
# Define the callback for saving the best model
checkpoint_callback = ModelCheckpoint(
    'age_model.keras',    # Path to save the best model
    monitor='val_loss',     # Metric to monitor
    save_best_only=True,    # Save only the best model
    mode='min',             # Minimize the monitored metric
    verbose=1               # Print messages when saving the model
)

In [None]:
# Train the model with both callbacks
history = model.fit(
    X_train, y_train,
    epochs=20,
    validation_data=(X_val, y_val),  # Use the separate validation set
    batch_size=64,
    callbacks=[checkpoint_callback]
)

In [None]:
# Evaluate the model on the test set
loss, mae = model.evaluate(X_test, y_test, verbose=0)
print('Test loss: {}'.format(loss))
print('Test MAE: {}'.format(mae))

# Evaluate on test set

In [None]:
# Load the model and generate predictions
def load_model_and_predict(model_path, X_test):
    model = tf.keras.models.load_model(model_path)
    y_pred = model.predict(X_test)
    return y_pred

In [None]:
# Huber Loss function
def huber_loss(y_true, y_pred, delta=1.0):
    error = y_true - y_pred
    is_small_error = np.abs(error) <= delta
    squared_loss = 0.5 * np.square(error)
    linear_loss = delta * (np.abs(error) - 0.5 * delta)
    return np.where(is_small_error, squared_loss, linear_loss).mean()


In [None]:
# Load age model and generate predictions
age_model_path = '../models/age_model.keras'
age_predictions = load_model_and_predict(age_model_path, X_test)


In [None]:
# Convert predictions to a 1D array if necessary
age_predictions = age_predictions.flatten()
y_true_age = y_test

In [None]:
# Calculate metrics
mae = mean_absolute_error(y_true_age, age_predictions)
mse = mean_squared_error(y_true_age, age_predictions)
huber = huber_loss(y_true_age, age_predictions)

print(f"Mean Absolute Error: {mae}")
print(f"Mean Squared Error: {mse}")
print(f"Huber Loss: {huber}")


In [None]:
# Scatter Plot
plt.figure(figsize=(15, 5))
plt.subplot(1, 3, 1)
plt.scatter(y_true_age, age_predictions, alpha=0.5)
plt.plot([y_true_age.min(), y_true_age.max()], [y_true_age.min(), y_true_age.max()], 'r--', lw=2)
plt.xlabel('True Age')
plt.ylabel('Predicted Age')
plt.title('True vs Predicted Age')

# Residual Plot
residuals = y_true_age - age_predictions
plt.subplot(1, 3, 2)
plt.scatter(age_predictions, residuals, alpha=0.5)
plt.axhline(0, color='r', linestyle='--')
plt.xlabel('Predicted Age')
plt.ylabel('Residuals')
plt.title('Residual Plot')

# Histogram of Errors
plt.subplot(1, 3, 3)
plt.hist(residuals, bins=30, edgecolor='k', alpha=0.7)
plt.xlabel('Error')
plt.ylabel('Frequency')
plt.title('Histogram of Prediction Errors')

plt.tight_layout()
plt.show()