#  STEP 1:Getting Embeddings.

## It detects the face and returns probability with which the face is detected.
## It returns the 512 embeddings vector,the most crucial for face recognition .
## MTCNN is used for face detection,and for finding the bounding box,(which is later used for drawing box around the face).
## Inception Resnet V1 model (defined here as resnet) is used for finding the embeddings vector.

### Essential imports

In [13]:
from __future__ import print_function
import torch
from torch.utils.data import DataLoader
from torchvision import transforms, datasets
import numpy as np
import pandas as pd
import torch.nn as nn
import cv2
from facenet_pytorch import MTCNN, InceptionResnetV1,extract_face #cloned from facenet pytorch model from github into "facenet_pytorch"
from PIL import Image,ImageDraw   # A PIL image is sent to MTCNN

import os
import torch.nn as nn
""" while operating in webcam,the fps(frames per second) was really low,hence we include some moe functions to improve fps.
"""
from Web.imutils.video import WebcamVideoStream
from Web.imutils.video import FPS
import imutils


###  Essential Functions

### cos_sim(): Returns the dot product of the vectors in the function.
### uses numpy funcions linalg.norm() and dot()


#### numpy.linalg.norm(x, ord=None, axis=None, keepdims=False)[source]

    Matrix or vector norm.

    This function is able to return one of eight different matrix norms, or one of an infinite number of vector norms (described below), depending on the value of the ord parameter.
    Parameters:	

    x : array_like

        Input array. If axis is None, x must be 1-D or 2-D.
    ord : {non-zero int, inf, -inf, ‘fro’, ‘nuc’}, optional

        Order of the norm (see table under Notes). inf means numpy’s inf object.
    axis : {int, 2-tuple of ints, None}, optional

        If axis is an integer, it specifies the axis of x along which to compute the vector norms. If axis is a 2-tuple, it specifies the axes that hold 2-D matrices, and the matrix norms of these matrices are computed. If axis is None then either a vector norm (when x is 1-D) or a matrix norm (when x is 2-D) is returned.

    keepdims : bool, optional

        If this is set to True, the axes which are normed over are left in the result as dimensions with size one. With this option the result will broadcast correctly against the original x.

        

    Returns:	

    n : float or ndarray

        Norm of the matrix or vector(s).
        
        
####   numpy.dot(a, b, out=None)¶

    Dot product of two arrays. Specifically,

        If both a and b are 1-D arrays, it is inner product of vectors (without complex conjugation).

        If both a and b are 2-D arrays, it is matrix multiplication, but using matmul or a @ b is preferred.

        If either a or b is 0-D (scalar), it is equivalent to multiply and using numpy.multiply(a, b) or a * b is preferred.

        If a is an N-D array and b is a 1-D array, it is a sum product over the last axis of a and b.

        If a is an N-D array and b is an M-D array (where M>=2), it is a sum product over the last axis of a and the second-to-last axis of b:

        dot(a, b)[i,j,k,m] = sum(a[i,j,:] * b[k,:,m])

    Parameters:	

    a : array_like

        First argument.
    b : array_like

        Second argument.
    out : ndarray, optional

        Output argument. This must have the exact kind that would be returned if it was not used. In particular, it must have the right type, must be C-contiguous, and its dtype must be the dtype that would be returned for dot(a,b). This is a performance feature. Therefore, if these conditions are not met, an exception is raised, instead of attempting to be flexible.

    Returns

    output : ndarray

        Returns the dot product of a and b. If a and b are both scalars or both 1-D arrays then a scalar is returned; otherwise an array is returned. If out is given, then it is returned.

    Raises:	

    ValueError

        If the last dimension of a is not the same size as the second-to-last dimension of b.



In [14]:
def cos_sim(a, b):
    """Takes 2 vectors a, b and returns the cosine similarity according 
    to the definition of the dot product"""
    dot_product = np.dot(a, b)
    norm_a = np.linalg.norm(a)
    norm_b = np.linalg.norm(b)
    return dot_product / (norm_a * norm_b)


### cos(): cos_sim returns real numbers,where negative numbers have different interpretations.So we use this function to return only positive values

In [15]:
def cos(a,b):
    minx = -1 
    maxx = 1
    return (cos_sim(a,b)- minx)/(maxx-minx)

### verify(): The key function in face recognition.
#### It takes the embedding vector of the test image and compares it with the embedding vector of the existing dataset.
#### The chosen threshold is 0.81(by trial and error and by checking accuracy(which is seen later)).
#### If the image is not recognised,nothing is mentioned,but still if face is detected,it draws the bounding box around it.
####  we print the names(also its mentioned in the image).

Here,boxes is a numpy nd array,obtained from mtcnn.detect(),containing the coordinates of bounding boxes of thedetected faces.

In [27]:

def verify(embedding):
    font = cv2.FONT_HERSHEY_DUPLEX
    for i,k in enumerate(embeddings):
        for j,l in enumerate(embedding):
            dist =cos(k,l)
    
        #print(dist)
    # Chosen threshold is 0.81  
            if dist > 0.81:
                text= names[i]
                left=boxes[j][0].astype(int)
                right=boxes[j][2].astype(int)
                top=boxes[j][1].astype(int)
                bottom=boxes[j][3].astype(int)
                n=int((bottom-top)/6)
                m=int((right-left)/10)
                cv2.rectangle(ima, (left, bottom-n), (right, bottom), (0, 0,0), cv2.FILLED)
               
                cv2.putText(ima, text, (left +m, bottom - m), font, 1.0, (255, 255, 255), 1)
                #cv2.putText(ima, text,(boxes[j][0].astype(int),boxes[j][3].astype(int)), cv2.FONT_HERSHEY_SCRIPT_SIMPLEX, 1, (0,255,0), 2)
                print(text)

## Essential definitions

### Since we use a pretrained model(in VGGFACE2 database),we require only cpu.So we define the device to be 'cpu'.
### The pretrained model Inception Resnet V1 is called as resnet.
### An instance of class MTCNN (as mtcnn) ,the parameters can be changed (refer facenet_pytorch repository)

In [17]:
device = torch.device('cpu') # cpu ,since is already pre-trained and doesnt require gpu.
print('Running on device: {}'.format(device))

# Define MTCNN module

# Note that, since MTCNN is a collection of neural nets and other code, the
# device must be passed in the following way to enable copying of objects when
# needed internally.
mtcnn = MTCNN(
    image_size=160, margin=0, min_face_size=20,
    thresholds=[0.6, 0.7, 0.7], factor=0.709, prewhiten=True,
    device=device
)


# Define Inception Resnet V1 module

resnet = InceptionResnetV1(pretrained='vggface2').eval().to(device)

Running on device: cpu


In [18]:
# Define a dataset and data loader
dataset = datasets.ImageFolder('data/pic')
dataset.idx_to_class = {i:c for c, i in dataset.class_to_idx.items()}
loader = DataLoader(dataset, collate_fn=lambda x: x[0])

# Perfom MTCNN facial detection
aligned = []
names = []
for x, y in loader:
    x_aligned, prob = mtcnn(x, return_prob=True)
    if x_aligned is not None:
        print('Face detected with probability: {:8f}'.format(prob))
        aligned.append(x_aligned)
        names.append(dataset.idx_to_class[y])

# Calculate image embeddings
aligned = torch.stack(aligned).to(device)
embeddings = resnet(aligned).cpu()



# Print distance matrix for classes

cos_sim = nn.CosineSimilarity(dim=-1, eps=1e-6)
for i in range(0,len(names)):
    emb=embeddings[i].unsqueeze(0) 
    dist =cos(embeddings[0],emb)  # The cosine similarity between the embeddings.
    
    
dists = [[cos(e1,e2).item() for e2 in embeddings] for e1 in embeddings]
print(pd.DataFrame(dists, columns=names, index=names)) # helpful while analysing the results and for determining the value of threshold.


Face detected with probability: 0.999996
Face detected with probability: 0.999567
Face detected with probability: 0.999999
Face detected with probability: 0.999994
Face detected with probability: 0.999403
Face detected with probability: 0.995384
Face detected with probability: 0.999852
Face detected with probability: 0.999999
Face detected with probability: 1.000000
Face detected with probability: 1.000000
Face detected with probability: 0.999992
Face detected with probability: 0.999990
Face detected with probability: 0.999998
Face detected with probability: 0.999984
Face detected with probability: 0.999992
Face detected with probability: 1.000000
Face detected with probability: 0.999983
Face detected with probability: 0.999997
Face detected with probability: 0.999850
Face detected with probability: 0.999953
Face detected with probability: 0.999628
Face detected with probability: 0.999971
Face detected with probability: 0.999980
Face detected with probability: 0.999989
Face detected wi

#  STEP 2: Opening the WEBCAM and getting the faces recognised

### Essential  definitions
#### We use haarcascade classifier for detecting faces,as it is much faster than mtcnn,since in webcam ,speed is at higher edge .Considering no occlusions,haarcascades give much overall performance than mtcnn in real life cases.

In [19]:
i=1
classifier = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')
mtcnn=MTCNN(keep_all=True)
cos_sim = nn.CosineSimilarity(dim=-1, eps=1e-6)

### Open Webcam.

In [28]:
vs = WebcamVideoStream(src=0).start()
print("camera open")
while True:
    im= vs.read()
    im=cv2.flip(im,1) #Flip to act as a mirror
    
    try:
        frame = imutils.resize(im, width=400)
        faces = classifier.detectMultiScale(frame)
        path="./data/testing...".format(i)
        if not os.path.exists('./data/testing...'.format(i)):
            os.makedirs('./data/testing...'.format(i))
        img_name = "im_{}.jpg".format(i)    
        cv2.imwrite(os.path.join(path,img_name),frame)
        imgName="./data/testing.../im_{}.jpg".format(i)
        
    # Get cropped and prewhitened image tensor
        img=Image.open(imgName)
        i=i+1
        img_cropped = mtcnn(img)
        boxes,prob=mtcnn.detect(img)
        img_draw = img.copy()
        draw = ImageDraw.Draw(img_draw)
        for i, box in enumerate(boxes):
            draw.rectangle(box.tolist())
            extract_face(img, box, save_path='detected_face_{}.png'.format(i))
        img_draw.save('annotated_faces.png')
        ima=cv2.imread('annotated_faces.png')
        
    # Calculate embedding (unsqueeze to add batch dimension)
        img_embedding = resnet(img_cropped)
        verify(img_embedding)
        cv2.imshow('Detecting...',ima)
    
    except:
        text="No image found"
        cv2.putText(ima, text, (((box[2]-box[0])/2).astype(int),box[3].astype(int)),cv2.FONT_HERSHEY_DUPLEX, 1, (255,255,255), 1)
        cv2.imshow('Detecting...',ima)
            
    
       
    key = cv2.waitKey(1)
    
        # if Esc key is press then break out of the loop 
    if key == 27:#The Esc key
        break         
cv2.destroyAllWindows() 
vs.stop()

camera open
amma
maria
maria
maria
maria
amma
maria
maria
miffi
amma
amma
maria
maria
maria
miffi
amma
amma
maria
maria
maria
maria
miffi
amma
amma
maria
maria
maria
maria
amma
amma
maria
maria
maria
maria
miffi
amma
maria
maria
maria
maria
maria
miffi
amma
maria
maria
maria
maria
maria
maria
maria
miffi
amma
amma
maria
maria
maria
maria
maria
maria
miffi
miffi
maria
maria
maria
maria
maria
maria
miffi
amma
amma
maria
maria
maria
maria
maria
maria
miffi
amma
maria
maria
maria
maria
maria
maria
amma
maria
maria
maria
maria
maria
maria
maria
amma
maria
maria
maria
maria
maria
maria
maria
amma
maria
maria
maria
maria
maria
maria
maria
amma
maria
maria
maria
maria
maria
maria
maria
amma
amma
maria
maria
maria
maria
maria
maria
maria
maria
amma
maria
maria
maria
maria
maria
amma
maria
maria
maria
maria
maria
maria
amma
maria
maria
maria
maria
maria
maria
maria
amma
maria
maria
maria
maria
maria
maria
maria
amma
amma
maria
maria
maria
maria
maria
maria
maria
amma
maria
maria
maria
maria
mari