In [None]:
import os
import cv2
import time
import pickle
import numpy as np
import tensorflow as tfs
import matplotlib.pyplot as plt
from sklearn.svm import SVC
from scipy import misc
from Detection import Detection
from Recognition import Recognition
from Smoothen_recog import Smoothen_recog
import preprocess
import imutils

In [1]:
#Path for reading the video
Input_video_path = "../../Dataset/EximiusAI_Dataset/Original/Videos/20190312_153102.mp4"
Output_video_path = "../Outputs/Detection_Recognition_outputs/"
Output_video_name = "EximiusAI_12.avi"
classifier_pickle_file = "../util/classifier_sgd_casia.pkl"
O_path = Output_video_path + Output_video_name

In [None]:
#Path for the detection and recognition pb files
Detection_model_path = "../Models/Detection_mtcnn.pb"
Recognition_model_path = "../Models/Recognition_facenet.pb"

In [None]:
#Instances of detection and recognition are being created.
#Instances are created to avoid loading the graphs and sessions again and again for every frame.
detection = Detection(Detection_model_path)
recognition = Smoothen_recog(Recognition_model_path , classifier_pickle_file)

In [None]:
cap = cv2.VideoCapture(Input_video_path)
#Variable to count frames.
frame_count = 0
#starting the processing time to calculate fps.
start = time.time()
#Ensuring the input_video_path opens without errors.
if (cap.isOpened()== False):
    print("Error opening video stream or file")

#getting the frame_width , frame_height from the given input video.
frame_width = int(cap.get(3))
frame_height = int(cap.get(4))
frame_area = frame_width * frame_height

#Change Orientation
temp = frame_height
frame_height = frame_width
frame_width = temp

out = cv2.VideoWriter(O_path,cv2.VideoWriter_fourcc('M','J','P','G'), 30 , (frame_width,frame_height))

#Reading each and every frame in a while loop and processing/inferencing them through two models.
while(cap.isOpened()):
    ret, frame = cap.read()
    frame_start_time = time.time()
    frame_count = frame_count+1
    if ret != True or frame_count>10000:
        break
    if ret == True:
        frame = imutils.rotate_bound(frame,90)
        #Inferencing the Detection model
        bbox, scores, landmarks = detection.detect(frame)
        
        if len(bbox) != 1:
            _ = recognition.recog(len(bbox), 0)
        
        if len(bbox) == 1:
            box = bbox[0]
            box = box.astype('int32')
            box_w = box[3] - box[1]
            box_h = box[2] - box[0]
            box_a = box_w*box_h
            percent = box_a*100/frame_area

            # CROPPING THE FACES OUT OF THE IMAGE AND APPENDING THEM TO THE LIST
            print('[INFO] percentage of bounding box in total image : {:.2f}'.format(percent))
            face = np.copy(frame[box[0]:box[2] , box[1]:box[3]])
            if percent >1.0 and face.shape[0] != 0 and face.shape[1]!= 0 and face.shape[2] !=0:
                face = cv2.cvtColor(face, cv2.COLOR_BGR2RGB)
                face = preprocess.prewhiten(face)
                prediction_id, best_class_probabilities = recognition.recog(1,face)
                prob_color_g = int(best_class_probabilities*255)
                prob_color_r = 255 - prob_color_g
                tup = (0,prob_color_g,prob_color_r)
                if best_class_probabilities < 0.5:
                    prediction_id = ["Unknown"]
                    conf = "-"
                print("Prediction ID:",str(prediction_id[0]))
                frame = cv2.rectangle(frame, (box[1], box[0]), (box[3], box[2]), tup, 3)
                cv2.putText(frame, prediction_id[0], (box[1], box[0]),
                            cv2.FONT_HERSHEY_SIMPLEX, 1, tup ,
                            thickness=2, lineType=2)
        out.write(frame)
        
        frame_end_time = time.time()
        time_per_frame = frame_end_time - frame_start_time
        fps_frame = 1/time_per_frame
        print('[INFO] total boxes:', len(bbox))
        print('[INFO] Processing Frame:', frame_count)
        print('[INFO] Processing Speed:',fps_frame," FPS")
        print('[INFO] Time Per Frame:', time_per_frame)
        
end = time.time()
timet = end - start
fps = frame_count/timet
print("[INFO] NUMBER OF FRAMES:", frame_count)
print("[INFO] Detection took {:.5} seconds".format(end - start))
print("[INFO] Overall FPS: "+ str(fps))

# closing the writer and reader

cap.release()
out.release()


In [None]:
out.release()