# Face Similarity Detection with FaceNet

### Imports

In [None]:
import os

import torch
import matplotlib.pyplot as plt
from PIL import Image
from tqdm import tqdm

from facenet_pytorch import MTCNN, InceptionResnetV1

%matplotlib inline

### Image loading

In [None]:
def pil_loader(path):
    """
    Load an image and convert it in 'RGB'
    
    Return an image
    """
    with open(path, 'rb') as f:
        img = Image.open(f)
        return img.convert('RGB')

    
def plot_img_list(img_list):
    """
    Display images side by side
    
    """
    fig = plt.figure(figsize=(10,10))

    for i, img in enumerate(img_list):
        fig.add_subplot(1, len(img_list), i+1)
        plt.axis('off')
        plt.imshow(img)

In [None]:
working_dir = os.getcwd()
print(f"Your working directory : {working_dir}")

In [None]:
# Add images you want to play with in a folder called *'test_images'* in your working directory for simplicity
img_dir = os.path.join(working_dir, 'test_images')
crop_dir = os.path.join(working_dir, 'crop_images')
if os.path.exists(img_dir):
    print(f"Your 'test_images' directory : {img_dir}")
else:
    print("Please create a folder called 'test_images' in your working directory")
if os.path.exists(crop_dir):
    print(f"Your 'cropped_dir' directory : {crop_dir}")
else:
    print("Please create a folder called 'crop_images' in your working directory")

In [None]:
img_filenames = os.listdir(img_dir)
print(img_filenames)

In [None]:
img_list = [pil_loader(os.path.join(img_dir, path)) for path in tqdm(img_filenames)]

In [None]:
plot_img_list(img_list)

### Face detection and cropping

### Detect and crop faces with MTCNN

**Multi-task convolutional neural network (MTCNN)**, works in three steps and use one neural network for each. The first part is a proposal network. It will predict potential face positions and their bounding boxes like an attention network in Faster R-CNN. The result of this step is a large number of face detections and lots of false detections. The second part uses images and outputs of the first prediction. It makes a refinement of the result to eliminate most of false detections and aggregate bounding boxes. The last part refines even more the predictions and adds facial landmarks predictions.

<img src="img_pres/MTCNN.png" width="350"> 
Image from : https://www.mdpi.com/2076-3417/9/18/3774/htm

In [None]:
# Get the device : GPU / CPU on which torch runs
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(f"Running on device: {device}")

# Load the mtcnn model : the model that allows face detection and cropping
mtcnn = MTCNN(image_size=160, margin=0, min_face_size=20, thresholds=[0.6, 0.7, 0.7], 
              factor=0.709, post_process=True, device=device)

In [None]:
# Function that loops over a list of images, detect & crops faces or add them to invalid images list (no face detected)
def detect_faces(img_list, img_filenames, mtcnn):
    """
    Detect face on an image and save the croppped face
    
    Return a tensor list of aligned images and an image list of invalid images
    """
    aligned_tensor = []
    invalid_imgs = []

    for i, img in enumerate(tqdm(img_list)):
        img_aligned = mtcnn(img, return_prob=False, save_path=os.path.join(crop_dir, img_filenames[i]))
        
        if img_aligned is not None:
            aligned_tensor.append(img_aligned)
        else:
            invalid_imgs.append(img)

    aligned_tensor = torch.stack(aligned_tensor).to(device)
    
    return aligned_tensor, invalid_imgs

In [None]:
aligned_tensor, invalid_imgs = detect_faces(img_list, img_filenames, mtcnn)

if len(invalid_imgs):
    print("Invalid images detected")
    print("Please romove invalid images detected")
    plot_img_list(invalid_imgs)

In [None]:
crop_imgs = [pil_loader(os.path.join(img_dir, os.path.join(crop_dir, path))) for path in tqdm(img_filenames)]
plot_img_list(crop_imgs)

### - Create embeddings from pretrained model

**FaceNet** provides a unified embedding for face recognition, verification and clustering tasks. It maps each face image into a euclidean space such that the distances in that space correspond to face similarity, i.e. an image of person A will be placed closer to all the other images of person A as compared to images of any other person present in the dataset.

The main difference between FaceNet and other techniques is that it learns the mapping from the images and creates embeddings rather than using any bottleneck layer for recognition or verification tasks. 

<img src="img_pres/facenet.png" width="500">

**FaceNet** uses deep convolutional neural network (CNN). The network is trained such that the squared L2 distance between the embeddings correspond to face similarity. Thanks to the triplet loss function the model can learn that we want our anchor image (image of a specific person A) to be closer to positive images (all the images of person A) as compared to negative images (all the other images).

In [None]:
# Load the Inception Resnet model : the model that will calculate embeddings based on cropped faces
pretrained_model = 'vggface2'
resnet = InceptionResnetV1(pretrained=pretrained_model).eval().to(device)
embeddings = resnet(aligned_tensor).detach().cpu()

### - Calculate distances between embeddings (images)

<img src="img_pres/cosin_vs_l2.png" width="500">

This is a visual representation of **euclidean distance (d)** and **cosine similarity (θ)**. While cosine looks at the angle between vectors (thus not taking into regard their weight or magnitude), euclidean distance is similar to using a ruler to actually measure the distance.

In [None]:
def calculate_distance_l2(embedding1, embedding2):
    """
    Calculate the Euclidean norm (L2) of 2 tensors
    
    Return a disctionary with a distance type and value
    Range [0:∞] where 0 means shortest distance (perfect similarity) and ∞ means longest distance (lowest similarity)
    """
    distance = (embedding1 - embedding2).norm()
    
    distance_dict = {
        "distance_type" : "L2",
        "distance_value" : distance
    }
    
    return distance_dict


def calculate_distance_cosin(embedding1, embedding2):
    """
    Calculate the cosin similarity (angle) of 2 tensors
    
    Return a disctionary with a distance type and value
    (Range [-1:1] where -1 means opposite similarity and 1 means perfect similarity)
    """
    distance = torch.nn.functional.cosine_similarity(embedding1, embedding2, dim=0)
    
    distance_dict = {
        "distance_type" : "Cosin",
        "distance_value" : distance
    }
    
    return distance_dict


def similarity_detection(distance_dict, threshold=0.9):
    """
    Apply a threshold to a distance
    """
    match_emoji = "\U0001f600"
    no_match_emoji = "\U0001F62D"
    
    distance_type = distance_dict.get("distance_type")
    distance_value = distance_dict.get("distance_value")
    
    print(f"Distance type : {distance_type}")
    
    if distance_type == "L2" and distance_value <= threshold:
        print(f"Same identity {match_emoji}")
        
    elif distance_type == "Cosin" and distance_value >= threshold:
        print(f"Same identity {match_emoji}")
    
    else:
        print(f"Different identities {no_match_emoji}")
    
    print(f"With a threshold of : {threshold:.2f}")
    print(f"With a distance of : {distance_value:.2f}")

### Testset 1

In [None]:
#index position
i_img1, i_img2 = 0, 1
img_to_compare = [img_list[i_img1], img_list[i_img2]]

In [None]:
plot_img_list(img_to_compare)

In [None]:
distance_dict_l2 = calculate_distance_l2(embeddings[i_img1], embeddings[i_img2])
distance_dict_cosin = calculate_distance_cosin(embeddings[i_img1], embeddings[i_img2])

In [None]:
distance_value_l2 = distance_dict_l2.get("distance_value")
distance_value_cosin = distance_dict_cosin.get("distance_value")

print(f"Distance : Euclidean norm = {distance_value_l2:.2f}")
print(f"Distance : Cosin = {distance_value_cosin:.2f}")

In [None]:
similarity_detection(distance_dict_l2, threshold=0.9)

### Testset 2

In [None]:
#index position
i_img1, i_img2 = 0, 3
img_to_compare = [img_list[i_img1], img_list[i_img2]]

In [None]:
plot_img_list(img_to_compare)

In [None]:
distance_dict_l2 = calculate_distance_l2(embeddings[i_img1], embeddings[i_img2])
distance_dict_cosin = calculate_distance_cosin(embeddings[i_img1], embeddings[i_img2])

In [None]:
distance_value_l2 = distance_dict_l2.get("distance_value")
distance_value_cosin = distance_dict_cosin.get("distance_value")

print(f"Distance : Euclidean norm = {distance_value_l2:.2f}")
print(f"Distance : Cosin = {distance_value_cosin:.2f}")

In [None]:
similarity_detection(distance_dict_l2, threshold=0.9)