In [1]:
import os
import cv2
import time
import pickle
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.svm import SVC
from scipy import misc
from Detection import Detection
from Recognition import Recognition
import preprocess

  from ._conv import register_converters as _register_converters


In [2]:
#Path for reading the video
Input_video_path = "../Dataset/videos/vid1.mp4"
Output_video_path = "../Outputs/Detection_Recognition_outputs/"
Output_video_name = "Detection_Recognition.avi"
classifier_pickle_file = "../util/classifier.pkl"
O_path = Output_video_path + Output_video_name

In [3]:
#Path for the detection and recognition pb files
Detection_model_path = "../Models/Detection_mtcnn.pb"
Recognition_model_path = "../Models/Recognition_facenet.pb"

In [4]:
#Instances of detection and recognition are being created.
#Instances are created to avoid loading the graphs and sessions again and again for every frame.
detection = Detection(Detection_model_path)
recognition = Recognition(Recognition_model_path)

Detection Model Graph Initialized
Recognition Model Graph Initialized


In [5]:
with open(classifier_pickle_file, 'rb') as infile:
    model,class_names = pickle.load(infile)

In [6]:
#Initializing video capture from the Input_video_path.
cap = cv2.VideoCapture(Input_video_path)
#Variable to count frames.
frame_count = 0
#starting the processing time to calculate fps.
start = time.time()
#Ensuring the input_video_path opens without errors.
if (cap.isOpened()== False):
    print("Error opening video stream or file")

#getting the frame_width , frame_height from the given input video.
frame_width = int(cap.get(3))
frame_height = int(cap.get(4))
frame_area = frame_width * frame_height

#creating a video file to write the output frames at output_video_path(O_path).

out = cv2.VideoWriter(O_path,cv2.VideoWriter_fourcc('M','J','P','G'), 30 , (frame_width,frame_height))

#Reading each and every frame in a while loop and processing/inferencing them through two models.
grab = 0
while(cap.isOpened()):
    ret, frame = cap.read()
    frame_start_time = time.time()
    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    frame_count = frame_count+1
    if ret != True or frame_count>1000:
        break
    if ret == True:
        
        #Detection Starts :
        
        #Inferencing the Detection model
        bbox, scores, landmarks = detection.detect(frame)
        for box, pts in zip(bbox, landmarks):
            box = box.astype('int32')
            box_w = box[3] - box[1]
            box_h = box[2] - box[0]
            box_a = box_w*box_h
            percent = box_a*100/frame_area
            
            # CROPPING THE FACES OUT OF THE IMAGE AND APPENDING THEM TO THE LIST
            print('[INFO] percentage of bounding box in total image : {:.2f}'.format(percent))
            face = np.copy(frame[box[0]:box[2] , box[1]:box[3]])
            if percent >1.0 and face.shape[0] != 0 and face.shape[1]!= 0 and face.shape[2] !=0:
                if grab == 0:
                    img = face
                grab = grab+1
#                 plt.imshow(face)
#                 plt.show()
                face = preprocess.prewhiten(face)
#                 print(face.shape)
                embedding = recognition.recognize(face = face)
                predictions = model.predict_proba(embedding)
                prediction_id = model.predict(embedding)
#                 print("PREDICTIONS :",predictions)
                best_class_indices = np.argmax(predictions, axis=1)
#                 print("BEST CLASS INDICES", best_class_indices)
                best_class_probabilities = predictions[np.arange(len(best_class_indices)), best_class_indices]
                print(best_class_probabilities)
#                 print("BEST CLASS PROBABILITIES", best_class_probabilities)
#                 for i in range(len(best_class_indices)):
#                     print('%4d  %s: %.3f' % (i, class_names[best_class_indices[i]], best_class_probabilities[i]))
                frame = cv2.rectangle(frame, (box[1], box[0]), (box[3], box[2]), (255, 0, 0), 3)
                pts = pts.astype('int32')
                
                class_prob = str(prediction_id[0])+" : {:.2f}".format(best_class_probabilities[0])
                cv2.putText(frame, class_prob, (box[1], box[0]),
                            cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0),
                            thickness=2, lineType=2)
                #Uncomment this to have landmarks
#                 for i in range(5):
#                     frame = cv2.circle(frame, (pts[i+5], pts[i]), 4, (0, 0, 255), 8)
                
        frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
        #Writing to the video output
        out.write(frame)
        
        frame_end_time = time.time()
        time_per_frame = frame_end_time - frame_start_time
        fps_frame = 1/time_per_frame
        print('[INFO] total boxes:', len(bbox))
        print('[INFO] Processing Frame:', frame_count)
        print('[INFO] Processing Speed:',fps_frame," FPS")
        print('[INFO] Time Per Frame:', time_per_frame)
        
end = time.time()
timet = end - start
fps = frame_count/timet
print("[INFO] NUMBER OF FRAMES:", frame_count)
print("[INFO] Detection took {:.5} seconds".format(end - start))
print("[INFO] Overall FPS: "+ str(fps))

# closing the writer and reader

cap.release()
out.release()

[INFO] percentage of bounding box in total image : 2.66
[0.98241956]
[INFO] percentage of bounding box in total image : 0.12
[INFO] total boxes: 2
[INFO] Processing Frame: 1
[INFO] Processing Speed: 0.38083525991983974  FPS
[INFO] Time Per Frame: 2.625807285308838
[INFO] percentage of bounding box in total image : 2.82
[0.96354355]
[INFO] percentage of bounding box in total image : 0.13
[INFO] total boxes: 2
[INFO] Processing Frame: 2
[INFO] Processing Speed: 11.765921695247126  FPS
[INFO] Time Per Frame: 0.0849912166595459
[INFO] percentage of bounding box in total image : 2.89
[0.97803214]
[INFO] percentage of bounding box in total image : 0.13
[INFO] total boxes: 2
[INFO] Processing Frame: 3
[INFO] Processing Speed: 12.438291979158326  FPS
[INFO] Time Per Frame: 0.08039689064025879
[INFO] percentage of bounding box in total image : 2.76
[0.98691626]
[INFO] total boxes: 1
[INFO] Processing Frame: 4
[INFO] Processing Speed: 14.18758456459382  FPS
[INFO] Time Per Frame: 0.0704841613769

KeyboardInterrupt: 