In [1]:
import cv2
import numpy as np
import time

In [2]:
from keras.models import Sequential
from keras.layers import Conv2D, ZeroPadding2D, Activation, Input, concatenate
from keras.models import Model
from keras.layers.normalization import BatchNormalization
from keras.layers.pooling import MaxPooling2D, AveragePooling2D
from keras.layers.merge import Concatenate
from keras.layers.core import Lambda, Flatten, Dense
from keras.initializers import glorot_uniform
from keras.engine.topology import Layer
from keras import backend as K
K.set_image_data_format('channels_first')
import cv2
import os
import numpy as np
from numpy import genfromtxt
import pandas as pd
import tensorflow as tf
from fr_utils import *
from inception_blocks_v2 import *

%matplotlib inline

Using TensorFlow backend.


In [44]:
FRmodel = faceRecoModel(input_shape=(3, 96, 96))       # Inception model used for FaceNet

In [4]:
def triplet_loss(y_true, y_pred, alpha = 0.2):
    """
    Arguments:
    y_true -- true labels
    y_pred -- python list containing three objects:
            anchor -- the encodings for the anchor images, of shape (None, 128)
            positive -- the encodings for the positive images, of shape (None, 128)
            negative -- the encodings for the negative images, of shape (None, 128)
    
    Returns:
    loss -- real number, value of the loss
    """
    
    anchor, positive, negative = y_pred[0], y_pred[1], y_pred[2]
    
    
    pos_dist = tf.reduce_sum(tf.square(tf.subtract(anchor,positive)),axis = -1)
    neg_dist = tf.reduce_sum(tf.square(tf.subtract(anchor,negative)),axis = -1)
    basic_loss = tf.add(tf.subtract(pos_dist,neg_dist),alpha)
    loss = tf.reduce_sum(tf.maximum(basic_loss,0),axis = None)
    
    return loss

In [5]:
FRmodel.compile(optimizer = 'adam', loss = triplet_loss, metrics = ['accuracy'])
load_weights_from_FaceNet(FRmodel)

In [38]:
# Database of Authorized person 

database = {}     

img2 = cv2.imread("sample_image.jpg",1)                           # loading the image in databse (Image should be of dimension (96,96,3))
database["AuthorizedPerson"] = img_to_encoding(img2,FRmodel)      #Converting image into identity matrix

In [47]:
# Verify the authorized person
# by calculating the distance between image of detected person and the authorized person

def verify(img, identity, database, model):
    """
    Function that verifies if the person on the "img" image is "identity".
    
    Arguments:
    img -- image 
    identity -- string, name of the person you'd like to verify the identity. 
    database -- python dictionary mapping names of allowed people's names (strings) to their encodings (vectors).
    model -- your Inception model instance in Keras
    
    Returns:
    dist -- distance between the image and the image of "identity" in the database.
   
    """
    
    img = cv2.resize(img, (96,96), 1)                         # Resize image for encoding
    encoding = img_to_encoding(img,model)                     # Get the encoding vector of image
    dist =  np.linalg.norm(database[identity] - encoding)     # Find the distance beetween the encoding of 
                                                              # image and encoding of authorized image's encoding
    return dist

In [28]:
# Load Yolo

net = cv2.dnn.readNet("yolov3.weights", "yolov3.cfg")             # Loading yolo model 
classes = []
with open("coco.names", "r") as f:
    classes = [line.strip() for line in f.readlines()]            # Get name of classes for coco data sets
layer_names = net.getLayerNames()                                 # List of layers in Yolo model
output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]
colors = np.random.uniform(0, 255, size=(len(classes), 3))

In [48]:
#SOURCE : https://pysource.com/2019/07/08/yolo-real-time-detection-on-cpu/
# Loading image
#cap = cv2.VideoCapture("video.mp4")
cap = cv2.VideoCapture(0)                                                            # Capture video from webcam
fourcc = cv2.VideoWriter_fourcc(*'DIVX')                                             # For saving the output video
save = cv2.VideoWriter('output.avi', fourcc, 20.0, (640,  360))                      # Name of the file to save and dimesnions
font = cv2.FONT_HERSHEY_PLAIN                                                        # Font for text to be used in frame for detected objects
starting_time = time.time()
frame_id = 0
while True:
    ret, frame = cap.read()
    if not ret:
        print("Can't receive frame (stream end?). Exiting ...")                      # If no frames received loop will break
        break
    frame_id += 1

    height, width, channels = frame.shape                                            # Get shape of frame

    # Detecting objects
    blob = cv2.dnn.blobFromImage(frame, 0.00392, (416, 416), (0, 0, 0), True, crop=False)

    net.setInput(blob)
    outs = net.forward(output_layers)                                                # Passing the image to the model for object detection

    # Showing informations on the screen
    class_ids = []
    confidences = []
    boxes = []
    for out in outs:
        for detection in out:
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]
            
            if confidence > 0.2:
                # Object detected
                center_x = int(detection[0] * width)
                center_y = int(detection[1] * height)
                w = int(detection[2] * width)
                h = int(detection[3] * height)

                # Rectangle coordinates
                x = int(center_x - w / 2)
                y = int(center_y - h / 2)

                boxes.append([x, y, w, h])
                confidences.append(float(confidence))
                class_ids.append(class_id)

    indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.8, 0.3)                             # Applying non max supression for detecting object
    
    for i in range(len(boxes)):
        if i in indexes:
            x, y, w, h = boxes[i]
            
            label = str(classes[class_ids[i]])
            
            confidence = confidences[i]
            color = colors[class_ids[i]]
            
            cv2.rectangle(frame, (x, y), (x + w, y + h), color, 2)
            
            if label=="person":
                frametoinput = frame
                
                # verify takes current frame, Identity of Authorized person, databse and modelinstance as input
                
                dist = verify(frametoinput, "AuthorizedPerson", database, FRmodel)         # Measure the norm distance between face in frame and 
                                                                                           # authorized person's face encoding
                if dist<0.5:                                                               # If checking for confidence 
                    cv2.putText(frame, "AuthorizedPerson", (x, y + 30), font, 2, color, 2)        # Put text in the frame
                else:
                    cv2.putText(frame, "Unknown, Another person detected",\
                                (x, y + 30), font, 2, color, 2)
            if label!="person":
                cv2.putText(frame, label + " detected", (x+30, y+30), font, 2, color, 2)
    
    elapsed_time = time.time() - starting_time
    fps = frame_id / elapsed_time                                                          # Calculating FPS
    cv2.putText(frame, "FPS: " + str(round(fps, 2)), (10, 50), font, 2, (0, 0, 0), 3)
    save.write(frame)                                                                      # Saving the output video
    cv2.imshow("Image", frame)
    key = cv2.waitKey(1)
    if key == 0:
        break
cap.release()
save.release()
cv2.waitKey(0)
cv2.destroyAllWindows()