#  STEP 1:Getting Embeddings.

## It detects the face and returns probability with which the face is detected.
## It returns the 512 embeddings vector,the most crucial for face recognition .
## MTCNN is used for face detection,and for finding the bounding box,(which is later used for drawing box around the face).
## Inception Resnet V1 model (defined here as resnet) is used for finding the embeddings vector.

### Essential imports

In [1]:
import glob # for files
import random
import numpy as np
from facenet_pytorch import MTCNN, InceptionResnetV1,extract_face   #cloned from facenet pytorch model from github into "facenet_pytorch"
from PIL import Image,ImageDraw
import torch
import cv2
import torch.nn as nn
from torch.utils.data import DataLoader
from torchvision import transforms, datasets
import pandas as pd

## Essential functions.

### cos_sim(): Returns the dot product of the vectors in the function.
### uses numpy funcions linalg.norm() and dot()


#### numpy.linalg.norm(x, ord=None, axis=None, keepdims=False)

    Matrix or vector norm.

    This function is able to return one of eight different matrix norms, or one of an infinite number of vector norms (described below), depending on the value of the ord parameter.
    Parameters:	

    x : array_like

        Input array. If axis is None, x must be 1-D or 2-D.
    ord : {non-zero int, inf, -inf, ‘fro’, ‘nuc’}, optional

        Order of the norm (see table under Notes). inf means numpy’s inf object.
    axis : {int, 2-tuple of ints, None}, optional

        If axis is an integer, it specifies the axis of x along which to compute the vector norms. If axis is a 2-tuple, it specifies the axes that hold 2-D matrices, and the matrix norms of these matrices are computed. If axis is None then either a vector norm (when x is 1-D) or a matrix norm (when x is 2-D) is returned.

    keepdims : bool, optional

        If this is set to True, the axes which are normed over are left in the result as dimensions with size one. With this option the result will broadcast correctly against the original x.

        

    Returns:	

    n : float or ndarray

        Norm of the matrix or vector(s).
        
        
####   numpy.dot(a, b, out=None)¶

    Dot product of two arrays. Specifically,

        If both a and b are 1-D arrays, it is inner product of vectors (without complex conjugation).

        If both a and b are 2-D arrays, it is matrix multiplication, but using matmul or a @ b is preferred.

        If either a or b is 0-D (scalar), it is equivalent to multiply and using numpy.multiply(a, b) or a * b is preferred.

        If a is an N-D array and b is a 1-D array, it is a sum product over the last axis of a and b.

        If a is an N-D array and b is an M-D array (where M>=2), it is a sum product over the last axis of a and the second-to-last axis of b:

        dot(a, b)[i,j,k,m] = sum(a[i,j,:] * b[k,:,m])

    Parameters:	

    a : array_like

        First argument.
    b : array_like

        Second argument.
    out : ndarray, optional

        Output argument. This must have the exact kind that would be returned if it was not used. In particular, it must have the right type, must be C-contiguous, and its dtype must be the dtype that would be returned for dot(a,b). This is a performance feature. Therefore, if these conditions are not met, an exception is raised, instead of attempting to be flexible.

    Returns

    output : ndarray

        Returns the dot product of a and b. If a and b are both scalars or both 1-D arrays then a scalar is returned; otherwise an array is returned. If out is given, then it is returned.

    Raises:	

    ValueError

        If the last dimension of a is not the same size as the second-to-last dimension of b.



In [2]:
def cos_sim(a, b):
    """Takes 2 vectors a, b and returns the cosine similarity according 
    to the definition of the dot product"""
    dot_product = np.dot(a, b)
    norm_a = np.linalg.norm(a)
    norm_b = np.linalg.norm(b)
    return dot_product / (norm_a * norm_b)


### cos(): cos_sim returns real numbers,where negative numbers have different interpretations.So we use this function to return only positive values

In [3]:
def cos(a,b):
    minx = -1 
    maxx = 1
    return (cos_sim(a,b)- minx)/(maxx-minx)


### verify():
#### The key function in face recognition.
 It takes the embedding vector of the test image and compares it with the embedding vector of the existing dataset.
The chosen threshold is 0.81(by trial and error and by checking accuracy(which is seen later)).
If the image is not recognised,nothing is mentioned,but still if face is detected,it draws the bounding box around it.
we print the names(also its mentioned in the image).

Here,boxes is a numpy nd array,obtained from mtcnn.detect(),containing the coordinates of bounding boxes of thedetected faces

In [15]:
def verify(embedding):
    found=0 
    dis=[()]
    for i,k in enumerate(embeddings):
        for j,l in enumerate(embedding):
            dist =cos(k,l)
            if dist > 0.73:
                d=dist.tolist()
                dis.append((d,i,j))                 
    if len(dis)>1:           
        res=max(dis)
        a=res[1]
        text=names[a]
       
    else:
        text="Not Identified"
    return text        
               

### get_files():
Gets the file list an save it as prediction set.We can save a part of file list as training and a part as testing.But snce we already have the dataset for training, we directly use the entire files as prediction set(or testing set).

If training and predicting are taken from files, then:

  training = files[:int(len(files)*0.80)] #get first 80% of file list
  
  prediction = files[-int(len(files)*0.20):] #get last 20% of file list

In [5]:
def get_files(nam): #Define function to get file list and save it as prediction dataset
    files = glob.glob("data/test_public/%s/*" %nam)
    prediction = files[:int(len(files))] 

    return prediction

### make_sets():

This function stores the files(here prediction dataset) and theor coresponding labels.

The same can be done for training dataset(if we take training dataset from files)

training_data = []

training_labels = []

prediction_data = []

prediction_labels = []
    

    for nam in name:
    
        training, prediction = get_files(emotion)
        #Append data to training and prediction list, and generate labels 0-7
            
            for item in training:
                 
                img = Image.open(item)
                
                img_cropped = mtcnn(img)
                
                training_data.append(img_cropped)
                
                training_labels_labels.append(nam)
     
            for item in prediction: #repeat above process for prediction set
            
                img = Image.open(item)
            
                img_cropped = mtcnn(img)
                
                prediction_data.append(img_cropped)
                
                prediction_labels.append(nam)
     
    return training_data, training_labels, prediction_data, prediction_labels

In [6]:
def make_sets():  # store files and their corresponding labels in the prediction dataset
    prediction_data = []
    prediction_labels = []
    for nam in name:
        prediction = get_files(nam)
        for item in prediction: 
            img = Image.open(item)
            img_cropped = mtcnn(img)
            prediction_data.append(img_cropped)
            prediction_labels.append(nam)
     
                  
    return prediction_data, prediction_labels



### run_recogniser():
#### The main accuaracy checking function.
If it is correctly identified,'correct' and 'count' is incremented.Else 'incorrect' and 'count'(cnt) is incremented.

Accuracy is defined as percentage of correct as againt the total number of images.


In [7]:
def run_recognizer():
    prediction_data, prediction_labels = make_sets()
    print ("predicting classification set")
    cnt = 0
    correct = 0
    incorrect = 0
    none=0
    for image in prediction_data:
        im=resnet(image)
        pred=verify(im)
        if pred == prediction_labels[cnt]:
            correct += 1
            cnt += 1
        else:
            print('correct:',prediction_labels[cnt],'predicted:',pred)
            incorrect += 1
            cnt += 1
    print(correct,incorrect,correct+incorrect)       
    return ((100*correct)/(correct + incorrect))

### Essential definitions.
#### name contains the names of the folders of the test images in prediction dataset.
#### Since we use a pretrained model(in VGGFACE2 database),we require only cpu.So we define the device to be 'cpu'.
#### The pretrained model Inception Resnet V1 is called as resnet.
#### An instance of class MTCNN (as mtcnn) ,the parameters can be changed (refer facenet_pytorch repository)

In [12]:
name= ['Mammooty','Mohanlal','ShahRukh','Shobana','SriDevi','Deepika Padukone','Nazriya','Varun Dhawan','Hritik Roshan','Nivin Pauly']  #names of the folders in ur prediction dataset

device = torch.device('cpu')        

print('Running on device: {}'.format(device))

# Define MTCNN module

# Note that, since MTCNN is a collection of neural nets and other code, the
# device must be passed in the following way to enable copying of objects when
# needed internally.
mtcnn = MTCNN(
    image_size=160, margin=0, min_face_size=20,
    thresholds=[0.6, 0.7, 0.7], factor=0.709, prewhiten=True,
    device=device
)


# Define Inception Resnet V1 module

resnet = InceptionResnetV1(pretrained='vggface2').eval().to('cpu')


Running on device: cpu


In [13]:
# Define a dataset and data loader
dataset = datasets.ImageFolder('datas/pic')
dataset.idx_to_class = {i:c for c, i in dataset.class_to_idx.items()}
loader = DataLoader(dataset, collate_fn=lambda x: x[0])

# Perfom MTCNN facial detection
aligned = []
names = []
for x, y in loader:
    x_aligned, prob = mtcnn(x, return_prob=True)
    if x_aligned is not None:
        print('Face detected with probability: {:8f}'.format(prob))
        aligned.append(x_aligned)
        names.append(dataset.idx_to_class[y])

# Calculate image embeddings
aligned = torch.stack(aligned).to(device)
embeddings = resnet(aligned).cpu()



# Print distance matrix for classes

cos_sim = nn.CosineSimilarity(dim=-1, eps=1e-6)
for i in range(0,len(names)):
    emb=embeddings[i].unsqueeze(0) 
    dist =cos(embeddings[0],emb)  # The cosine similarity between the embeddings.
    
    
dists = [[cos(e1,e2).item() for e2 in embeddings] for e1 in embeddings]
print(pd.DataFrame(dists, columns=names, index=names)) # helpful while analysing the results and for determining the value of threshold.





Face detected with probability: 0.999736
Face detected with probability: 0.999283
Face detected with probability: 0.999999
Face detected with probability: 0.999980
Face detected with probability: 0.999376
Face detected with probability: 0.998882
Face detected with probability: 0.999959
Face detected with probability: 0.999958
Face detected with probability: 0.999897
Face detected with probability: 0.999304
Face detected with probability: 0.999617
Face detected with probability: 0.999927
Face detected with probability: 0.999963
Face detected with probability: 0.999512
Face detected with probability: 0.999995
Face detected with probability: 0.999994
Face detected with probability: 0.999873
Face detected with probability: 0.999963
Face detected with probability: 0.999880
Face detected with probability: 0.999899
Face detected with probability: 0.999999
Face detected with probability: 0.999996
Face detected with probability: 0.999983
Face detected with probability: 1.000000
Face detected wi

In [16]:
mtcnn=MTCNN(keep_all=True)
metascore = []

correct = run_recognizer()
print("Got", correct, "percent correct!")
metascore.append(correct)
print ("\n\nend score:", np.mean(metascore), "percent correct!")

predicting classification set
correct: Mammooty predicted: Nivin Pauly
94 1 95
Got 98.94736842105263 percent correct!


end score: 98.94736842105263 percent correct!
