In [None]:
# Data collection
import tensorflow as tf
from tensorflow.keras.utils import load_img
from tensorflow.keras.regularizers import l2
from keras.models import Sequential, Model
from keras.layers import Dense, Conv2D, Dropout, Flatten, MaxPooling2D, Input, BatchNormalization
import numpy as np
import random
import matplotlib.pyplot as plt
import os
import warnings
from tqdm.notebook import tqdm
warnings.filterwarnings('ignore')
%matplotlib inline

# Mount google drive
from google.colab import drive
drive.mount('/content/drive')

# Data collction
BASE_DIR = '/content/drive/MyDrive/ML2CW1/train_val'
age_labels = []
gender_labels = []
image_paths = []

image_filenames = os.listdir(BASE_DIR)
random.shuffle(image_filenames) # random sequence for training

for image in tqdm(image_filenames):
  image_path = os.path.join(BASE_DIR, image)
  img_components = image.split('_') # Split by "[age] [gender] [race] [date&time].jpg"
  age_label = int(img_components[0])
  gender_label = int(img_components[1])

  # Append to image list and feature lists in sequence
  age_labels.append(age_label)
  gender_labels.append(gender_label)
  image_paths.append(image_path)

print(f'No. of images: {len(image_paths)}, No. of gender: {len(gender_labels)}, No. of age: {len(age_labels)}')

# Mapping image path and features
import pandas as pd
df = pd.DataFrame()
df['image_path'], df['age'], df['gender'] = image_paths, age_labels, gender_labels
df.head(10)

In [None]:
# Visualize data photos
from PIL import Image
import seaborn as sns

rand_index = random.randint(0, len(image_paths))# Show random image
IMG = Image.open(df['image_path'][rand_index])

gender_mapping = {0: 'Male', 1: 'Female'} # Gender mapping

# Plot the image with age and gender information
plt.figure(figsize=(10, 8))
plt.subplot(2, 2, 1)
plt.title(f'Age: {df["age"][rand_index]} Gender: {gender_mapping[df["gender"][rand_index]]}')
plt.axis('off')
plt.imshow(IMG)

# Age distribution
plt.subplot(2, 2, 2)
sns.distplot(df['age'])
plt.title('Age Distribution')

# Gender distribution
plt.subplot(2, 2, 3)
sns.countplot(df['gender'].map(gender_mapping))
plt.title('Gender Distribution')

gender_counts = df['gender'].map(gender_mapping).value_counts()
plt.subplot(2, 2, 4)
plt.text(0.5, 0.5, f'Male: {gender_counts["Male"]}\nFemale: {gender_counts["Female"]}', fontsize=12, ha='center')
plt.axis('off')


plt.tight_layout()
plt.show()

# Showing 20 images
plt.figure(figsize=(25, 25))
samples = df.iloc[0:20]

for index, sample, age, gender in samples.itertuples():
    plt.subplot(5, 4, index + 1)
    img = load_img(sample)
    img = np.array(img)
    plt.axis('off')
    plt.title(f'Age: {age} | Gender: {gender_mapping[gender]} | Image Size: {img.shape[0]}x{img.shape[1]}')
    plt.imshow(img)

In [None]:
# Data rearrangment
def extract_image_features(images):
    features = list()

    for image in tqdm(images):
        img = load_img(image, target_size=(128, 128))
        img = img.convert('RGB')
        img = np.array(img)
        features.append(img)

    features = np.array(features)
    return features

In [None]:
# Data pre-processing
X = extract_image_features(df['image_path'])

X.shape

X = X / 255.0

y_gender = np.array(df['gender'])
y_age = np.array(df['age'])

input_shape = (128, 128, 3) # Set input arrays shape

rand_index = random.randint(0, len(X)-1)

# Reshape and plot picked image
plt.figure(figsize=(5, 5))
plt.imshow(X[rand_index].reshape(input_shape))
plt.title(f'Reshaped Image')
plt.axis('off')
plt.show()

random_image = X[rand_index]

# Showing image size
print(f"Size of the image: {random_image.shape}")

# Print the number array of the three layers
print("\nNumber array of the three layers:")
print("Red channel:")
print(random_image[:,:,0])  # Rgb
print("\nGreen channel:")
print(random_image[:,:,1])  # rGb
print("\nBlue channel:")
print(random_image[:,:,2])  # rgB

# Plot the image in RGB and each channels
fig, axes = plt.subplots(1, 3, figsize=(15, 5))

axes[0].imshow(random_image[:,:,0], cmap='Reds')
axes[0].set_title('Red Channel')
axes[0].axis('off')

axes[1].imshow(random_image[:,:,1], cmap='Greens')
axes[1].set_title('Green Channel')
axes[1].axis('off')

axes[2].imshow(random_image[:,:,2], cmap='Blues')
axes[2].set_title('Blue Channel')
axes[2].axis('off')

plt.show()


In [None]:
# CNN network building
from tensorflow.keras.optimizers import Adam

inputs = Input((input_shape))
conv_1 = Conv2D(64, kernel_size=(3, 3), activation='relu', kernel_regularizer=l2(0.001))(inputs)
max_1 = MaxPooling2D(pool_size=(2, 2))(conv_1)
batch_norm_1 = BatchNormalization()(max_1)

conv_2 = Conv2D(128, kernel_size=(3, 3), activation='relu', kernel_regularizer=l2(0.001))(batch_norm_1)
max_2 = MaxPooling2D(pool_size=(2, 2))(conv_2)
batch_norm_2 = BatchNormalization()(max_2)

conv_3 = Conv2D(256, kernel_size=(3, 3), activation='relu', kernel_regularizer=l2(0.001))(batch_norm_2)
max_3 = MaxPooling2D(pool_size=(2, 2))(conv_3)
batch_norm_3 = BatchNormalization()(max_3)

conv_4 = Conv2D(512, kernel_size=(3, 3), activation='relu', kernel_regularizer=l2(0.001))(batch_norm_3)
max_4 = MaxPooling2D(pool_size=(2, 2))(conv_4)
batch_norm_4 = BatchNormalization()(max_4)

flatten = Flatten()(batch_norm_4)

dense_1 = Dense(512, activation='relu', kernel_regularizer=l2(0.001))(flatten)
dense_2 = Dense(512, activation='relu', kernel_regularizer=l2(0.001))(flatten)

dropout_1 = Dropout(0.4)(dense_1)
dropout_2 = Dropout(0.4)(dense_2)

output_1 = Dense(1, activation='sigmoid', name='gender_out')(dropout_1)
output_2 = Dense(1, activation='relu', name='age_out')(dropout_2)

modelA = Model(inputs=[inputs], outputs=[output_1, output_2])

learning_rate = 0.0001  # Learning rate

optimizer = Adam(learning_rate=learning_rate)

modelA.compile(loss=['binary_crossentropy', 'mae'],
              optimizer=optimizer, metrics=['accuracy', 'mae'])

modelA.summary()

from tensorflow.keras.utils import plot_model
plot_model(modelA, show_shapes=True)


In [None]:
# Training
from tensorflow.keras.callbacks import EarlyStopping

early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

history = modelA.fit(x=X, y=[y_gender, y_age],
                     batch_size=32, epochs=100, validation_split=0.2, callbacks=[early_stopping])

# Save model to Google Drive
modelA.save('/content/drive/MyDrive/ML2CW1/age_gender_A.h5')

In [None]:
# Learning curves
import matplotlib.pyplot as plt
import numpy as np

# Plot results for gender classification
acc_gender = history.history['gender_out_accuracy']
val_acc_gender = history.history['val_gender_out_accuracy']
loss_gender = history.history['gender_out_loss']
val_loss_gender = history.history['val_gender_out_loss']

# Plot results for age estimation
loss_age = history.history['age_out_loss']
val_loss_age = history.history['val_age_out_loss']

epochs = range(len(acc_gender))

# Plot figures for gender classification
plt.figure(figsize=(12, 6))

plt.subplot(1, 2, 1)
plt.plot(epochs, acc_gender, 'b', label='Training Accuracy')
plt.plot(epochs, val_acc_gender, 'r', label='Validation Accuracy')
plt.title('Gender Classification Accuracy')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(epochs, loss_gender, 'b', label='Training Loss')
plt.plot(epochs, val_loss_gender, 'r', label='Validation Loss')
plt.title('Gender Classification Loss')
plt.legend()

plt.tight_layout()
plt.show()

###########################################################

# Plot results for age estimation
plt.figure(figsize=(12, 6))

# Calculate MAE for age estimation (manually)
mae_train_age = history.history['age_out_mae']
mae_val_age = history.history['val_age_out_mae']
loss_age = history.history['age_out_loss']
val_loss_age = history.history['val_age_out_loss']

epochs = range(len(mae_train_age))

plt.subplot(1, 2, 1)
plt.plot(epochs, mae_train_age, 'b', label='Training MAE')
plt.plot(epochs, mae_val_age, 'r', label='Validation MAE')
plt.title('Age Estimation MAE')
plt.legend()
plt.xlabel('Epochs')
plt.ylabel('Mean Absolute Error')

# Plot results for loss for age estimation
plt.subplot(1, 2, 2)

plt.plot(epochs, loss_age, 'b', label='Training Loss')
plt.plot(epochs, val_loss_age, 'r', label='Validation Loss')
plt.title('Age Estimation Loss')
plt.legend()
plt.xlabel('Epochs')
plt.ylabel('Loss')

plt.tight_layout()
plt.show()
##########################################################
print("Validation Accuracy for Gender Classification:", val_acc_gender[-11])
print("Validation Error for Gender Classification:", val_loss_gender[-11])

print("Validation MAE for Age Estimation (10 patients before final epoch):", mae_val_age[-11])
print("Validation Error for Age Estimation (10 patients before final epoch):", val_loss_age[-11])


In [None]:
# CNN network learning (Transfer learning)
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.models import Model

def ResNet50_transfer(input_shape=(128, 128, 3), classes=1, trainable_layers=30):

    base_model = ResNet50(include_top=False, weights='imagenet', input_shape=input_shape)


    for layer in base_model.layers[:-trainable_layers]:
        layer.trainable = False

    x = base_model.output
    x = Flatten()(x)
    x = Dense(4096, activation='relu', name='fc1')(x)
    x = Dense(4096, activation='relu', name='fc2')(x)

    output_1 = Dense(classes, activation='sigmoid', name='gender_out')(x)
    output_2 = Dense(classes, activation='relu', name='age_out')(x)

    # Combine base model with the custom top layer
    model = Model(inputs=base_model.input, outputs=[output_1, output_2], name='modelB')

    return model

input_shape = (128, 128, 3)

modelB = ResNet50_transfer(input_shape=input_shape)

learning_rate = 0.000007 # Learning rate

optimizer = Adam(learning_rate=learning_rate)

# Compile
modelB.compile(loss=['binary_crossentropy', 'mae'],
               optimizer=optimizer,
               metrics=['accuracy', 'mae'])

modelB.summary()

from tensorflow.keras.utils import plot_model
plot_model(modelB, show_shapes=True)


In [None]:
# Training
from tensorflow.keras.callbacks import EarlyStopping

early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

history = modelB.fit(x=X, y=[y_gender, y_age],
                     batch_size=32, epochs=200, validation_split=0.2, callbacks=[early_stopping])

# Save model to Google Drive
modelB.save('/content/drive/MyDrive/ML2CW1/age_gender_B.h5')


In [None]:
# Show learning curve
import matplotlib.pyplot as plt
import numpy as np

# Plot results for gender classification
acc_gender = history.history['gender_out_accuracy']
val_acc_gender = history.history['val_gender_out_accuracy']
loss_gender = history.history['gender_out_loss']
val_loss_gender = history.history['val_gender_out_loss']

# Plot results for age estimation
loss_age = history.history['age_out_loss']
val_loss_age = history.history['val_age_out_loss']

epochs = range(len(acc_gender))

# Plot figures for gender classification
plt.figure(figsize=(12, 6))

plt.subplot(1, 2, 1)
plt.plot(epochs, acc_gender, 'b', label='Training Accuracy')
plt.plot(epochs, val_acc_gender, 'r', label='Validation Accuracy')
plt.title('Gender Classification Accuracy')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(epochs, loss_gender, 'b', label='Training Loss')
plt.plot(epochs, val_loss_gender, 'r', label='Validation Loss')
plt.title('Gender Classification Loss')
plt.legend()

plt.tight_layout()
plt.show()

###########################################################

# Plot results for age estimation
plt.figure(figsize=(12, 6))

# Calculate MAE for age estimation (manually)
mae_train_age = history.history['age_out_mae']
mae_val_age = history.history['val_age_out_mae']
loss_age = history.history['age_out_loss']
val_loss_age = history.history['val_age_out_loss']

epochs = range(len(mae_train_age))

plt.subplot(1, 2, 1)
plt.plot(epochs, mae_train_age, 'b', label='Training MAE')
plt.plot(epochs, mae_val_age, 'r', label='Validation MAE')
plt.title('Age Estimation MAE')
plt.legend()
plt.xlabel('Epochs')
plt.ylabel('Mean Absolute Error')

# Plot results for loss for age estimation
plt.subplot(1, 2, 2)

plt.plot(epochs, loss_age, 'b', label='Training Loss')
plt.plot(epochs, val_loss_age, 'r', label='Validation Loss')
plt.title('Age Estimation Loss')
plt.legend()
plt.xlabel('Epochs')
plt.ylabel('Loss')

plt.tight_layout()
plt.show()
##########################################################
# Print validation accuracy and error for gender classification
print("Validation Accuracy for Gender Classification:", val_acc_gender[-11])
print("Validation Error for Gender Classification:", val_loss_gender[-11])

# Print validation MAE and error for age estimation for 10 patients before final epoch
print("Validation MAE for Age Estimation:", mae_val_age[-11])
print("Validation Error for Age Estimation:", val_loss_age[-11])