In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
import cv2
import random
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix
from tensorflow.keras import layers as L
from sklearn.preprocessing import LabelBinarizer
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization, GlobalAveragePooling2D, Lambda
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import Sequence
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
import seaborn as sns

# Data Pre-processing

In [None]:
data = pd.read_csv('../data/age_gender.csv')

## Converting pixels into numpy array
data['pixels']=data['pixels'].apply(lambda x:  np.array(x.split(), dtype="float32"))

data.head()

In [None]:
seed = 42
tf.random.set_seed(seed)
np.random.seed(seed)
random.seed(seed)

In [None]:
# Prepare the images and resize them to 128x128 using OpenCV
def resize_image(img, size=(224, 224)):
    img = img.reshape((48, 48))  # Reshape to 48x48
    img = cv2.resize(img, size)  # Resize to 128x128
    return img


In [None]:
X = np.array([resize_image(img) for img in data['pixels']])  # Resize each image
X = np.expand_dims(X, axis=-1)  # Add the channel dimension

In [None]:
# Normalize the pixel values to [0, 1]
X = X / 255.0

In [None]:
print('Total rows: {}'.format(len(data)))
print('Total columns: {}'.format(len(data.columns)))

In [None]:
plt.figure(figsize=(16,16))
for i in range(1500,1520):
    plt.subplot(5,5,(i%25)+1)
    plt.xticks([])
    plt.yticks([])
    plt.grid(False)
    plt.imshow(data['pixels'].iloc[i].reshape(48,48), cmap='gray')
    plt.xlabel(
        "Age:"+str(data['age'].iloc[i])+
        "  Ethnicity:"+str(data['ethnicity'].iloc[i])+
        "  Gender:"+ str(data['gender'].iloc[i])
    )
plt.show()

In [None]:
y = data['gender']

# Split the data into training and temporary sets
X_train, X_temp, y_train, y_temp = train_test_split(
    X, y, test_size=0.30, random_state=37
)  # 70% training, 30% temporary

# Split the temporary set into validation and test sets
X_val, X_test, y_val, y_test = train_test_split(
    X_temp, y_temp, test_size=0.50, random_state=37
)  # 50% of 30% = 15% test, 15% validation

# Custom CNN Model for Gender Prediction

In [None]:
# Define the model
model = tf.keras.Sequential([
    L.InputLayer(input_shape=(224, 224, 1)),
    L.Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 3)),
    L.BatchNormalization(),
    L.MaxPooling2D((2, 2)),
    L.Conv2D(64, (3, 3), activation='relu'),
    L.MaxPooling2D((2, 2)),
    L.Flatten(),
    L.Dense(64, activation='relu'),
    L.Dropout(rate=0.5),
    L.Dense(1, activation='sigmoid')
])

# Print the model summary
model.summary()

In [None]:
model.compile(optimizer='sgd',
              loss=tf.keras.losses.BinaryCrossentropy(),
              metrics=['accuracy'])

In [None]:

# Define the callback for saving the best model
checkpoint_callback = ModelCheckpoint(
    'gender_model.keras',         # Path to save the model file
    monitor='val_loss',           # Metric to monitor
    save_best_only=True,          # Save only the best model
    mode='min',                   # Minimize the monitored metric
    verbose=1                     # Print messages when saving the model
)

In [None]:
# Train the model with both callbacks
history = model.fit(
    X_train, y_train,
    epochs=20,                # Set the number of epochs
    validation_data=(X_val, y_val),
    callbacks=[checkpoint_callback]
)

In [None]:
# Evaluate the model on the test set
test_loss, test_acc = model.evaluate(X_test, y_test)
print(f"Test Loss: {test_loss}, Test Accuracy: {test_acc}")


# Evaluate on test set

In [None]:
# Function to load model and generate predictions
def load_model_and_predict(model_path, X_test):
    model = tf.keras.models.load_model(model_path)
    y_pred = model.predict(X_test)
    return y_pred

In [None]:
# Function to convert numerical prediction to gender label
def convert_gender_prediction(prediction):
    return 'Male' if prediction == 1 else 'Female'

In [None]:
# Function to plot confusion matrix
def plot_confusion_matrix(y_true, y_pred, labels, title='Confusion Matrix'):
    cm = confusion_matrix(y_true, y_pred, labels=labels)
    df_cm = pd.DataFrame(cm, index=labels, columns=labels)
    
    plt.figure(figsize=(10, 7))
    sns.heatmap(df_cm, annot=True, fmt='d', cmap='Blues')
    plt.title(title)
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')
    plt.show()

In [None]:
# Load the gender model and generate predictions
gender_model_path = '../models/gender_model.keras'
gender_model_predictions = load_model_and_predict(gender_model_path, X_test)

# Convert model predictions to binary class labels (0 or 1)
gender_predictions = (gender_model_predictions > 0.5).astype(int).flatten()

# Map numeric predictions to human-readable labels
gender_labels = ['Female', 'Male']

# Convert numeric predictions and true labels to human-readable labels
gender_test_labels = [convert_gender_prediction(x) for x in y_test]
gender_predictions_labels = [convert_gender_prediction(x) for x in gender_predictions]

In [None]:
# Plot confusion matrix for gender predictions
plot_confusion_matrix(gender_test_labels, gender_predictions_labels, labels=gender_labels, title='Gender Prediction Confusion Matrix')

In [None]:
# Calculate and print accuracy for gender predictions
gender_accuracy = accuracy_score(gender_test_labels, gender_predictions_labels)
print(f"Gender Prediction Accuracy: {gender_accuracy:.4f}")

In [None]:

# Calculate and print precision, recall, and F1-score
precision = precision_score(gender_test_labels, gender_predictions_labels, pos_label='Male')  # Assuming 'Male' is the positive class
recall = recall_score(gender_test_labels, gender_predictions_labels, pos_label='Male')
f1 = f1_score(gender_test_labels, gender_predictions_labels, pos_label='Male')

print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-Score: {f1:.4f}")

In [None]:
# Print classification report
print("\nClassification Report:")
print(classification_report(gender_test_labels, gender_predictions_labels, target_names=gender_labels))