# Face Recognition

#### Load Packages

In [4]:
# Import necessary libraries
import os
import numpy as np
import pandas as pd
import cv2  # For image processing
import tensorflow as tf
from keras.models import Sequential, Model
from keras.layers import (
    Conv2D, ZeroPadding2D, Activation, Input, BatchNormalization, MaxPooling2D,
    AveragePooling2D, Concatenate, Lambda, Flatten, Dense
)
from keras.initializers import glorot_uniform  # Xavier initialization
from keras import backend as K
from fr_utils import *  # Custom utility functions
from inception_blocks_v2 import *  # Custom inception block implementation

# Jupyter-specific configuration
%matplotlib inline
%load_ext autoreload
%autoreload 2

# Suppress scientific notation for large arrays
np.set_printoptions(suppress=True)

# Set TensorFlow's default image data format (Modern practice uses channels_last by default)
K.set_image_data_format('channels_first')  # If required by your pre-trained models


2025-01-17 12:47:07.010427: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-01-17 12:47:07.027279: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1737100027.048011   30678 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1737100027.053955   30678 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-01-17 12:47:07.074157: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instr

## 0 - Naive Face Verification

## 1 - Encoding face images in 128-dimensional vector

### 1.1 - Using a ConvNet to compute encodings

In [8]:
# Initialize the face recognition model with the input shape
FRmodel = faceRecoModel(input_shape=(3, 96, 96))


I0000 00:00:1737100029.777411   30678 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 6257 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 4070 Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.9


In [9]:
# Print the total number of parameters in the model
print("Total Params:", FRmodel.count_params())


Total Params: 3743280


### 1.2 The Triplet Loss

In [11]:
import tensorflow as tf

# Define the triplet loss function
def triplet_loss(y_true, y_pred, alpha=0.2):
    """
    Computes the triplet loss as per the formula:
    L = max(||f(a) - f(p)||^2 - ||f(a) - f(n)||^2 + alpha, 0)
    
    Arguments:
    y_true -- True labels (not used, required by Keras loss function signature).
    y_pred -- A list containing three tensors:
              anchor    - Encodings for anchor images, shape (batch_size, embedding_size).
              positive  - Encodings for positive images, shape (batch_size, embedding_size).
              negative  - Encodings for negative images, shape (batch_size, embedding_size).
    alpha -- Margin parameter for the triplet loss (default is 0.2).
    
    Returns:
    loss -- Scalar, the computed triplet loss value.
    """
    # Extract anchor, positive, and negative encodings from predictions
    anchor, positive, negative = y_pred

    # Compute the squared distances between anchor-positive and anchor-negative
    pos_dist = tf.reduce_sum(tf.square(anchor - positive), axis=-1)  # ||f(a) - f(p)||^2
    neg_dist = tf.reduce_sum(tf.square(anchor - negative), axis=-1)  # ||f(a) - f(n)||^2

    # Compute the basic loss: (positive distance - negative distance + alpha)
    basic_loss = pos_dist - neg_dist + alpha

    # Apply the ReLU activation to ensure the loss is non-negative
    loss = tf.reduce_sum(tf.maximum(basic_loss, 0.0))  # max(basic_loss, 0)

    return loss


In [12]:
# Set the random seed for reproducibility
tf.random.set_seed(1)

# Define random tensors for the anchor, positive, and negative encodings
y_pred = (
    tf.random.normal([3, 128], mean=6, stddev=0.1, seed=1),
    tf.random.normal([3, 128], mean=1, stddev=1, seed=1),
    tf.random.normal([3, 128], mean=3, stddev=4, seed=1),
)

# Compute the triplet loss
loss = triplet_loss(None, y_pred)

# Print the computed loss value
print(f"Loss = {loss.numpy():.4f}")


Loss = 527.2598


## 2 - Loading the pre-trained model

In [14]:
# Compile the Face Recognition model
# Using Adam optimizer and triplet loss as the loss function
FRmodel.compile(
    optimizer='adam', 
    loss=triplet_loss, 
    metrics=['accuracy']  # Note: Accuracy may not be meaningful for triplet loss
)

# Load pre-trained weights into the model
load_weights_from_FaceNet(FRmodel)


## 3 - Applying the model

### 3.1 - Face Verification

In [17]:
# Initialize an empty database for storing encodings
database = {}

# List of user names and corresponding image file paths
user_images = {
    "danielle": "images/danielle.png",
    "younes": "images/younes.jpg",
    "tian": "images/tian.jpg",
    "andrew": "images/andrew.jpg",
    "kian": "images/kian.jpg",
    "dan": "images/dan.jpg",
    "sebastiano": "images/sebastiano.jpg",
    "bertrand": "images/bertrand.jpg",
    "kevin": "images/kevin.jpg",
    "felix": "images/felix.jpg",
    "benoit": "images/benoit.jpg",
    "arnaud": "images/arnaud.jpg",
}

# Populate the database with image encodings
for name, image_path in user_images.items():
    try:
        # Encode the image using the FRmodel
        database[name] = img_to_encoding(image_path, FRmodel)
    except FileNotFoundError:
        print(f"Error: Image file '{image_path}' not found.")
    except Exception as e:
        print(f"Error encoding image for {name}: {e}")


(1, 3, 96, 96)


I0000 00:00:1737100078.362371   31173 service.cc:148] XLA service 0x791ce80034a0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1737100078.362414   31173 service.cc:156]   StreamExecutor device (0): NVIDIA GeForce RTX 4070 Laptop GPU, Compute Capability 8.9
2025-01-17 12:47:58.448030: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
I0000 00:00:1737100078.845531   31173 cuda_dnn.cc:529] Loaded cuDNN version 90300
2025-01-17 12:48:00.254242: W external/local_xla/xla/service/gpu/nvptx_compiler.cc:930] The NVIDIA driver's CUDA version is 12.4 which is older than the PTX compiler version 12.5.82. Because the driver is older than the PTX compiler version, XLA is disabling parallel compilation, which may slow down compilation. You should update your NVIDIA driver or use the NVIDIA-provided CUDA forward compatibility packages.


(1, 3, 96, 96)
(1, 3, 96, 96)
(1, 3, 96, 96)
(1, 3, 96, 96)
(1, 3, 96, 96)
(1, 3, 96, 96)
(1, 3, 96, 96)
(1, 3, 96, 96)
(1, 3, 96, 96)
(1, 3, 96, 96)
(1, 3, 96, 96)


I0000 00:00:1737100081.346957   31173 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


In [18]:
def verify(image_path: str, identity: str, database: dict, model) -> tuple:
    """
    Verifies if the person in the provided image matches the specified identity.

    Args:
    - image_path (str): Path to the input image.
    - identity (str): Name of the person to verify. Must exist in the database.
    - database (dict): Mapping of names to their respective encodings (vectors).
    - model: Inception model instance for encoding.

    Returns:
    - dist (float): The computed distance between the input image encoding and the stored encoding for the identity.
    - door_open (bool): True if the door should open, False otherwise.
    """
    try:
        # Step 1: Compute the encoding for the input image
        encoding = img_to_encoding(image_path, model)

        # Step 2: Calculate the Euclidean distance between the input encoding and the database encoding for the identity
        dist = np.linalg.norm(encoding - database[identity])

        # Step 3: Determine if the door should open based on the distance threshold
        if dist < 0.7:
            print(f"It's {identity}, welcome in!")
            door_open = True
        else:
            print(f"It's not {identity}, please go away.")
            door_open = False

    except KeyError:
        # Handle case where the identity is not in the database
        print(f"Error: Identity '{identity}' not found in the database.")
        return None, False
    except FileNotFoundError:
        # Handle missing image file
        print(f"Error: Image file '{image_path}' not found.")
        return None, False
    except Exception as e:
        # Catch all other exceptions
        print(f"An unexpected error occurred: {e}")
        return None, False

    return dist, door_open


In [19]:
# Verify if "camera_0.jpg" matches "younes" in the database
verify("images/camera_0.jpg", "younes", database, FRmodel)


(1, 3, 96, 96)
It's younes, welcome in!


(0.659367, True)

In [20]:
# Verify if "kian" matches the face in "images/camera_2.jpg" using the FRmodel and database.
verify("images/camera_2.jpg", "kian", database, FRmodel)


(1, 3, 96, 96)
It's not kian, please go away.


(0.8622286, False)

### 3.2 - Face Recognition

In [22]:
def who_is_it(image_path, database, model):
    """
    Identifies the person in the given image by finding the closest match in the database.
    
    Arguments:
    image_path -- str: Path to the image for identification.
    database -- dict: A dictionary with names as keys and image encodings as values.
    model -- Keras model: Pre-trained Inception model for generating image encodings.
    
    Returns:
    min_dist -- float: Minimum distance between the input image encoding and the database encodings.
    identity -- str: Name of the identified person from the database, or None if not found.
    """
    # Compute the encoding for the input image.
    encoding = img_to_encoding(image_path, model)

    # Initialize minimum distance to a high value and identity to None.
    min_dist = float('inf')  # Use infinity for clarity in comparisons.
    identity = None

    # Iterate through the database to find the closest match.
    for name, db_enc in database.items():
        # Compute the L2 (Euclidean) distance between encodings.
        dist = np.linalg.norm(encoding - db_enc)

        # Update the minimum distance and identity if a closer match is found.
        if dist < min_dist:
            min_dist = dist
            identity = name

    # Print the result based on the threshold value.
    if min_dist > 0.7:
        print("Not in the database.")  # No match found.
    else:
        print(f"It's {identity}, the distance is {min_dist:.2f}")  # Match found.

    return min_dist, identity


In [23]:
# Identify the person in the given image using the face recognition model.
who_is_it("images/camera_0.jpg", database, FRmodel)


(1, 3, 96, 96)
It's younes, the distance is 0.66


(0.659367, 'younes')

### Future work
#### Ways to improve your facial recognition model
Although we won't implement it here, here are some ways to further improve the algorithm:
- Put more images of each person (under different lighting conditions, taken on different days, etc.) into the database. Then given a new image, compare the new face to multiple pictures of the person. This would increase accuracy.
- Crop the images to just contain the face, and less of the "border" region around the face. This preprocessing removes some of the irrelevant pixels around the face, and also makes the algorithm more robust.


### References:

- Florian Schroff, Dmitry Kalenichenko, James Philbin (2015). [FaceNet: A Unified Embedding for Face Recognition and Clustering](https://arxiv.org/pdf/1503.03832.pdf)
- Yaniv Taigman, Ming Yang, Marc'Aurelio Ranzato, Lior Wolf (2014). [DeepFace: Closing the gap to human-level performance in face verification](https://research.fb.com/wp-content/uploads/2016/11/deepface-closing-the-gap-to-human-level-performance-in-face-verification.pdf) 
- The pretrained model we use is inspired by Victor Sy Wang's implementation and was loaded using his code: https://github.com/iwantooxxoox/Keras-OpenFace.
- Our implementation also took a lot of inspiration from the official FaceNet github repository: https://github.com/davidsandberg/facenet 
