### Import libraries

In [1]:
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout, Flatten
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, Callback
from keras_vggface.vggface import VGGFace
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score

# using MTCNN for face detection and alignment
from mtcnn import MTCNN

# using dlib for face detection and alignment
from eye_alignment_multiple import align_faces	

import os
import cv2
import numpy as np
import tensorflow as tf
from tensorflow.keras.utils import to_categorical
import re
from sklearn.model_selection import train_test_split

### Use MTCNN for face detection and alignment (not recommended)

In [None]:
# run this if use MTCNN for face detection and alignment
face_detector = MTCNN()
print("Face detector model loded...")

In [None]:
# Lists to store images and labels
dataset_images = []
dataset_labels = []
label_map = {}  # Mapping of labels to indices
label_index = 0

def MTCNN_add_image(image_path):
    """Loads images and extracts embeddings for training."""
    global label_index

    root, _ = os.path.splitext(image_path)
    label = os.path.split(root)[-1]
    match = re.search(r"^(\w+)_", label)
    label = match.group(1)

    if label not in label_map:
        label_map[label] = label_index
        label_index += 1

    image = cv2.imread(image_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    faces = face_detector.detect_faces(image) # use MTCNN for face detection and alignment

    if not faces:
        print(f"No face found in `{label}`, skipping.")
        return

    if len(faces) > 1:
        print(f"Multiple faces found in `{label}`, skipping.")
        return
    x, y, w, h = faces[0]["box"]
    x1, y1 = max(0, x), max(0, y)
    x2, y2 = min(image.shape[1], x + w), min(image.shape[0], y + h)

    cropped_face = image[y1:y2, x1:x2]
    resized_face = cv2.resize(cropped_face, (224, 224))
    resized_face = resized_face.reshape(1, 224, 224, 3)

    dataset_images.append(resized_face)
    dataset_labels.append(label_map[label])

# Load images from dataset
for dir, _, files in os.walk("../Member Photos"):
    for file in files:
        MTCNN_add_image(os.path.join(dir, file))

dataset_images = np.array(dataset_images)
dataset_labels = np.array(dataset_labels)
dataset_labels = to_categorical(dataset_labels, num_classes=len(label_map))

print(f"Loaded {len(dataset_images)} images for training.")

### Use dlib for face detection and alignment (faster than MTCNN)

In [2]:
# Lists to store images and labels
dataset_images = []
dataset_labels = []
label_map = {}  # Mapping of labels to indices
label_index = 0

def dlib_add_image(image_path):
    """Loads images and extracts embeddings for training."""
    global label_index

    root, _ = os.path.splitext(image_path)
    label = os.path.split(root)[-1]
    match = re.search(r"^(\w+)_", label)
    label = match.group(1)

    if label not in label_map:
        label_map[label] = label_index
        label_index += 1

    image = cv2.imread(image_path)
    faces, bounding_boxes = align_faces(image)  # use dlib for face detection and alignment

    if not faces:
        print(f"No face found in `{label}`, skipping.")
        return

    if len(faces) > 1:
        print(f"Multiple faces found in `{label}`, skipping.")
        return

    resized_face = cv2.resize(faces[0], (224, 224))

    dataset_images.append(resized_face)
    dataset_labels.append(label_map[label])

# Load images from dataset
for dir, _, files in os.walk("../Member Photos"):
    for file in files:
        dlib_add_image(os.path.join(dir, file))

dataset_images = np.array(dataset_images)
dataset_labels = np.array(dataset_labels)
dataset_labels = to_categorical(dataset_labels, num_classes=len(label_map))

print(f"Loaded {len(dataset_images)} images for training.")

No face detected!
No face found in `aaryan`, skipping.
No face detected!
No face found in `aaryan`, skipping.
No face detected!
No face found in `ethan`, skipping.
No face detected!
No face found in `ethan`, skipping.
No face detected!
No face found in `ethan`, skipping.
No face detected!
No face found in `ethan`, skipping.
No face detected!
No face found in `jinwei`, skipping.
No face detected!
No face found in `jinwei`, skipping.
No face detected!
No face found in `jinwei`, skipping.
No face detected!
No face found in `jinwei`, skipping.
Loaded 83 images for training.


In [3]:
print("shape of dataset_images:", dataset_images.shape)
print("shape of dataset_labels:", dataset_labels.shape)

shape of dataset_images: (83, 224, 224, 3)
shape of dataset_labels: (83, 6)


In [4]:
# split the dataset into training and validation sets
train_images, val_images, train_labels, val_labels = train_test_split(
    dataset_images, dataset_labels, test_size=0.2, stratify=dataset_labels, random_state=42
)

### Load and fit ResNet50 for feature extraction

In [None]:
BASE = r"C:/Users/jy158/Desktop/NTU/Notes/Y4S2/EE4228 Intelligent System Design/Assignment/EE4228_Assignment_2/"
checkpoint_path = BASE + "checkpoints/resnet50_face_recognition.h5"

# Create a callback that saves the model's weights during training
checkpoint_callback = ModelCheckpoint(
    filepath=checkpoint_path,
    save_best_only=True,  # Save only the best model based on validation loss
    monitor="val_loss",
    mode="min",
    verbose=1
)

# Custom callback to compute precision, recall, and F1-score after each epoch
class MetricsCallback(Callback):
    def on_epoch_end(self, epoch, logs=None):
        y_pred = np.argmax(self.model.predict(val_images), axis=1)  # Get predicted class labels
        y_true = np.argmax(val_labels, axis=1)  # Convert one-hot labels to class indices

        precision = precision_score(y_true, y_pred, average='macro')
        recall = recall_score(y_true, y_pred, average='macro')
        f1 = f1_score(y_true, y_pred, average='macro')
        accuracy = accuracy_score(y_true, y_pred)

        print(f"\nEpoch {epoch+1} - Precision: {precision:.4f}, Recall: {recall:.4f}, F1-score: {f1:.4f}, Accuracy: {accuracy:.4f}")

# Load the pre-trained ResNet50 model without the top classification layer
base_model = VGGFace(model='resnet50', include_top=False, input_shape=(224, 224, 3))

# Unfreeze some layers for fine-tuning
for layer in base_model.layers[-10:]:
    layer.trainable = True

# Get number of classes
num_classes = len(dataset_labels[1])

# Add custom layers for classification
x = Flatten()(base_model.output)
x = Dense(128, activation='relu')(x)
x = Dropout(0.5)(x)
predictions = Dense(num_classes, activation='softmax')(x)

# Define new model
model = Model(inputs=base_model.input, outputs=predictions)

In [6]:
# Compile the model
model.compile(optimizer=Adam(learning_rate=0.0001),
              loss='categorical_crossentropy')

# Train the model
model.fit(
    train_images, train_labels,
    batch_size=32,
    epochs=20,
    validation_data=(val_images, val_labels),
    callbacks=[checkpoint_callback, MetricsCallback()]
)

Epoch 1/20
Epoch 1: val_loss improved from inf to 0.06422, saving model to C:/Users/jy158/Desktop/NTU/Notes/Y4S2/EE4228 Intelligent System Design/Assignment/real-time-one-shot-face-recognition/checkpoints\resnet50_face_recognition.h5

Epoch 1 - Precision: 1.0000, Recall: 1.0000, F1-score: 1.0000, Accuracy: 1.0000
Epoch 2/20
Epoch 2: val_loss improved from 0.06422 to 0.01037, saving model to C:/Users/jy158/Desktop/NTU/Notes/Y4S2/EE4228 Intelligent System Design/Assignment/real-time-one-shot-face-recognition/checkpoints\resnet50_face_recognition.h5

Epoch 2 - Precision: 1.0000, Recall: 1.0000, F1-score: 1.0000, Accuracy: 1.0000
Epoch 3/20
Epoch 3: val_loss improved from 0.01037 to 0.00865, saving model to C:/Users/jy158/Desktop/NTU/Notes/Y4S2/EE4228 Intelligent System Design/Assignment/real-time-one-shot-face-recognition/checkpoints\resnet50_face_recognition.h5

Epoch 3 - Precision: 1.0000, Recall: 1.0000, F1-score: 1.0000, Accuracy: 1.0000
Epoch 4/20
Epoch 4: val_loss improved from 0.00

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Epoch 19: val_loss did not improve from 0.00032

Epoch 19 - Precision: 0.7361, Recall: 0.7917, F1-score: 0.7524, Accuracy: 0.8235
Epoch 20/20
Epoch 20: val_loss did not improve from 0.00032

Epoch 20 - Precision: 0.7361, Recall: 0.7917, F1-score: 0.7524, Accuracy: 0.8235


<keras.callbacks.History at 0x17c01c674f0>

### Evaluate finetuned Model

In [None]:
# function to iterate through test images and make predictions
def predict_test_images(test_dir, model):
    # Get all image paths in test directory
    image_paths = [os.path.join(test_dir, f) for f in os.listdir(test_dir) 
                  if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
    
    predictions = []
    
    for img_path in image_paths:
        # Load and preprocess image
        image = cv2.imread(img_path)
        faces, bounding_boxes = align_faces(image)  # use dlib for face detection and alignment

        if not faces:
            print(f"No face found, hence skipping.")
            return

        if len(faces) > 1:
            print(f"Multiple faces found, hence skipping.")
            return

        resized_face = cv2.resize(faces[0], (224, 224))
        resized_face = np.expand_dims(resized_face, axis=0)
        
        # Make prediction
        pred = model.predict(resized_face)
        pred_class = np.argmax(pred, axis=1)[0]  # Get the predicted class index
        confidence = np.max(pred)  # Get the confidence score
        
        predictions.append({
            'image_path': img_path,
            'predicted_class': pred_class,
            'confidence': confidence
        })
        
        print(f"Image: {os.path.basename(img_path)}")
        print(f"Predicted class: {pred_class}")
        print(f"Confidence: {confidence:.4f}")
        print("---")
    
    return predictions

In [None]:
checkpoint_path = BASE + "checkpoints/resnet50_face_recognition.h5"

base_model = VGGFace(model='resnet50', include_top=False, input_shape=(224, 224, 3))

if os.path.exists(checkpoint_path):
	print(f"Loading checkpoint from `{checkpoint_path}`...")
	# Add custom layers again for classification
	x = Flatten()(base_model.output)
	x = Dense(128, activation='relu')(x)
	x = Dropout(0.5)(x)
	predictions = Dense(num_classes, activation='softmax')(x)

	# Define the new model
	model = Model(inputs=base_model.input, outputs=predictions)

	# Load the weights from the checkpoint file
	model.load_weights(checkpoint_path)
else:
	print(f"Checkpoint `{checkpoint_path}` not found.")

Loading checkpoint from `C:/Users/jy158/Desktop/NTU/Notes/Y4S2/EE4228 Intelligent System Design/Assignment/real-time-one-shot-face-recognition/checkpoints/resnet50_face_recognition.h5`...


#### Predict on extra test images

In [None]:
# directory containing test images
test_dir = os.path.join(BASE, "test/")

# predict test images
finetune_predictions = predict_test_images(test_dir, model)

Image: 20250320_141710.jpg
Predicted class: 0
Confidence: 1.0000
---
Image: 20250320_141825.jpg
Predicted class: 5
Confidence: 0.9995
---
Image: 20250320_141904.jpg
Predicted class: 2
Confidence: 1.0000
---
Image: 20250320_142015.jpg
Predicted class: 4
Confidence: 0.9994
---
Image: 20250320_142048.jpg
Predicted class: 1
Confidence: 0.9915
---
Image: jinwei_04.jpg
Predicted class: 3
Confidence: 1.0000
---


In [None]:
# corresponding labels for the test images
print(label_map)

{'aaryan': 0, 'ethan': 1, 'eunice': 2, 'jinwei': 3, 'jonathan': 4, 'junyong': 5}


#### Predict on validation set

In [16]:
# Predict on validation set
y_pred_finetune = np.argmax(model.predict(val_images), axis=1)
y_true = np.argmax(val_labels, axis=1)

# Calculate metrics for the pretrained model
precision_finetune = precision_score(y_true, y_pred_finetune, average='macro')
recall_finetune = recall_score(y_true, y_pred_finetune, average='macro')
f1_finetune = f1_score(y_true, y_pred_finetune, average='macro')
accuracy_finetune = accuracy_score(y_true, y_pred_finetune)

print(f"Finetuned Model - Precision: {precision_finetune:.4f}, Recall: {recall_finetune:.4f}, F1-score: {f1_finetune:.4f}, Accuracy: {accuracy_finetune:.4f}")

Finetuned Model - Precision: 1.0000, Recall: 1.0000, F1-score: 1.0000, Accuracy: 1.0000


### Evaluate pre-trained model

In [None]:
pretrained_model = VGGFace(model='resnet50', include_top=False, input_shape=(224, 224, 3))

# Add custom layers again for classification
x = Flatten()(pretrained_model.output)
x = Dense(128, activation='relu')(x)
x = Dropout(0.5)(x)
predictions = Dense(num_classes, activation='softmax')(x)

# Define the new model
pretrained_model = Model(inputs=pretrained_model.input, outputs=predictions)

#### Predict on extra test images

In [10]:
test_dir = os.path.join(BASE, "test/")

pretrained_predictions = predict_test_images(test_dir, pretrained_model)

Image: 20250320_141710.jpg
Predicted class: 5
Confidence: 0.5744
---
Image: 20250320_141825.jpg
Predicted class: 4
Confidence: 0.9624
---
Image: 20250320_141904.jpg
Predicted class: 1
Confidence: 0.6818
---
Image: 20250320_142015.jpg
Predicted class: 4
Confidence: 0.6332
---
Image: 20250320_142048.jpg
Predicted class: 4
Confidence: 0.5411
---
Image: jinwei_04.jpg
Predicted class: 0
Confidence: 0.3756
---


#### Predict on validation set

In [None]:
# Predict on validation set
y_pred_pretrained = np.argmax(pretrained_model.predict(val_images), axis=1)
y_true = np.argmax(val_labels, axis=1)

# Calculate metrics for the pretrained model
precision_pretrained = precision_score(y_true, y_pred_pretrained, average='macro')
recall_pretrained = recall_score(y_true, y_pred_pretrained, average='macro')
f1_pretrained = f1_score(y_true, y_pred_pretrained, average='macro')
accuracy_pretrained = accuracy_score(y_true, y_pred_pretrained)

print(f"Pretrained Model - Precision: {precision_pretrained:.4f}, Recall: {recall_pretrained:.4f}, F1-score: {f1_pretrained:.4f}, Accuracy: {accuracy_pretrained:.4f}")

Pretrained Model - Precision: 0.1389, Recall: 0.1528, F1-score: 0.1444, Accuracy: 0.1765


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
