In [1]:
from __future__ import print_function
import sys
import os
from argparse import ArgumentParser, SUPPRESS
import cv2,threading
import time,datetime
import logging as log
import numpy as np
from multiprocessing import Process,Pool
import shutil,pickle
import matplotlib.pyplot as plt
from openvino.inference_engine import IEPlugin,IENetwork
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import SGDClassifier,LogisticRegression

In [4]:
m_fd = '../model_downloader/intel/face-detection-retail-0004/FP16/face-detection-retail-0004.xml'
m_ag = '../model_downloader/intel/age-gender-recognition-retail-0013/FP16/age-gender-recognition-retail-0013.xml'
m_hp = '..//model_downloader/intel/head-pose-estimation-adas-0001/FP16/head-pose-estimation-adas-0001.xml'
m_em = '../model_downloader/intel/emotions-recognition-retail-0003/FP32/emotions-recognition-retail-0003.xml'
m_freid = '../model_downloader/intel/face-reidentification-retail-0095/FP32/face-reidentification-retail-0095.xml'
channel = 'rtsp://admin:admin@123@10.10.12.14:554/Streaming/Channels/401/'
cpu_extension = '../build_samples/intel64/Release/lib/libcpu_extension.so'
device = 'CPU'


In [5]:
# Plugin initialization for specified device and load extensions library if specified
log.info("Initializing plugin for {} device...".format('CPU'))
plugin = IEPlugin(device='CPU')
if cpu_extension and 'CPU' in device:
    plugin.add_cpu_extension(cpu_extension)
# Read IR
log.info("Reading IR...")
m_fd_net = IENetwork(model=m_fd, weights=os.path.splitext(m_fd)[0] + ".bin")
m_ag_net = IENetwork(model=m_ag, weights=os.path.splitext(m_ag)[0] + ".bin")
m_hp_net = IENetwork(model=m_hp, weights=os.path.splitext(m_hp)[0] + ".bin")
m_em_net = IENetwork(model=m_em, weights=os.path.splitext(m_em)[0] + ".bin")
m_freid_net = IENetwork(model=m_freid, weights=os.path.splitext(m_freid)[0] + ".bin")



In [6]:
if device == "CPU":
    for net in [m_fd_net,m_ag_net,m_hp_net,m_em_net,m_freid_net]:
        supported_layers = plugin.get_supported_layers(net)
        not_supported_layers = [l for l in net.layers.keys() if l not in supported_layers]
        if len(not_supported_layers) != 0:
            log.error("Following layers are not supported by the plugin for specified device {}:\n {}".
                      format(plugin.device, ', '.join(not_supported_layers)))
            log.error("Please try to specify cpu extensions library path in demo's command line parameters using -l "
                      "or --cpu_extension command line argument")
            sys.exit(1)

In [7]:
input_blob = []
out_blob = []
exec_net = []
for i,net in enumerate([m_fd_net,m_ag_net,m_hp_net,m_em_net,m_freid_net]):
    # assert len(net.inputs.keys()) == 1, "Demo supports only single input topologies"
    # assert len(net.outputs) == 1, "Demo supports only single output topologies"
    input_blob.append(next(iter(net.inputs)))
    out_blob.append(next(iter(net.outputs)))
    log.info("Loading IR to the plugin...")
    exec_net.append(plugin.load(network=net, num_requests=16))
    # Read and pre-process input image
    n, c, h, w = net.inputs[input_blob[i]].shape
    print(n,h,c,w)


1 300 3 300
1 62 3 62
1 60 3 60
1 64 3 64
1 128 3 128


In [8]:
def generate_fd(input_frame,n=1,c=3,w=300,h=300,thresh=.1):
    op_frame = cv2.resize(input_frame,(w,h)).transpose((2, 0, 1)).reshape(n,c,h,w) 
    ### we can add multiple requests and just enumerate request ids
    exec_net[0].start_async(request_id=1, inputs={input_blob[0]: op_frame})
    if exec_net[0].requests[1].wait(-1)==0:
        res = exec_net[0].requests[1].outputs[out_blob[0]]
    res_filt =  res[np.where(res[:,:,:,2]>thresh)]
    res_filt = res_filt[np.min(res_filt,axis=1)>=0]
    return res_filt
    

In [9]:
def generate_ag(input_frame,bboxes,n=1,c=3,w=62,h=62):
    """
    output : age/100
    prob : [female, male]
    """
    
    res = []
    faces = [cv2.resize(input_frame[b[1]:b[3],b[0]:b[2]],(w,h)).transpose((2, 0, 1)).reshape(n,c,h,w) for b in bboxes]
    ### we can add multiple requests and just enumerate request ids
    [exec_net[1].start_async(request_id=cursor_id, inputs={input_blob[1]: face}) for cursor_id,face in enumerate(faces)]
    for i in range(len(faces)):
        if exec_net[1]. requests[i].wait(-1)==0:
            res.append(exec_net[1].requests[i].outputs)
    age = [int(i['age_conv3']*100) for i in res]
    gender = [ 'Female' if i['prob'][0][0]>i['prob'][0][1] else 'Male' for i in res]
    return list(zip(age,gender))
    

In [10]:
def generate_hp(input_frame,bboxes,n=1,c=3,w=60,h=60):
    res = []
    faces = [cv2.resize(input_frame[b[1]:b[3],b[0]:b[2]],(w,h)).transpose((2, 0, 1)).reshape(n,c,h,w) for b in bboxes]
    ### we can add multiple requests and just enumerate request ids
    [exec_net[2].start_async(request_id=cursor_id, inputs={input_blob[2]: face}) for cursor_id,face in enumerate(faces)]
    for i in range(len(faces)):
        if exec_net[2].requests[i].wait(-1)==0:
            res.append(exec_net[2].requests[i].outputs)
    return res
    

In [11]:
def generate_em(input_frame,bboxes,n=1,c=3,w=64,h=64):
    """
    'neutral', 'happy', 'sad', 'surprise', 'anger'
    """
    res = []
    faces = [cv2.resize(input_frame[b[1]:b[3],b[0]:b[2]],(w,h)).transpose((2, 0, 1)).reshape(n,c,h,w) for b in bboxes]
    ### we can add multiple requests and just enumerate request ids
    [exec_net[3].start_async(request_id=cursor_id, inputs={input_blob[3]: face}) for cursor_id,face in enumerate(faces)]
    for i in range(len(faces)):
        if exec_net[3].requests[i].wait(-1)==0:
            res.append(exec_net[3].requests[i].outputs)
    return res

In [12]:
def generate_face_embedding(input_frame,bboxes,n=1,c=3,w=128,h=128):
    res = []
    faces = [cv2.resize(input_frame[b[1]:b[3],b[0]:b[2]],(w,h)).transpose((2, 0, 1)).reshape(n,c,h,w) for b in bboxes]
    ### we can add multiple requests and just enumerate request ids
    [exec_net[4].start_async(request_id=cursor_id, inputs={input_blob[4]: face}) for cursor_id,face in enumerate(faces)]
    for i in range(len(faces)):
        if exec_net[4].requests[i].wait(-1)==0:
            res.append(exec_net[4].requests[i].outputs)
    return np.array([i['658'].flatten() for i in res])

In [13]:
def get_all_file_path(source_dir,ext=('.jpg','.png','.jpeg')):
    """
    all images with csv extension exist in set of dirs
    """
    op =[]
    for root, dirs, files in os.walk(source_dir):
        for file in files:
            if file.endswith(ext):
                
                 op.append(os.path.join(os.path.abspath(root), file))
    return op

In [14]:
def train_face_detection(train_data_location,thresh=.5):
    """
    train_data is in format of person name as folder name and containing images
    this function will return classifier and label_encoder
    
    """
    imgs = get_all_file_path(train_data_location)
    labels = [os.path.basename(os.path.dirname(f)) for f in imgs]
    embeddings = []
    for f in imgs:
        im = cv2.imread(f)
        initial_h,initial_w = im.shape[:2]
        while True:
            res_filt = generate_fd(im,thresh=thresh)
            bboxes = np.multiply([[initial_w,initial_h,initial_w,initial_h]],(res_filt[:,3:])).astype('int')
            embedding = generate_face_embedding(im,bboxes)
            if embedding.shape[0]>1:
                thresh = thresh+.05
            elif embedding.shape[0]==0:
                thresh = thresh-.05
            else:
                embeddings.append(embedding)
                break
    embedding_array = np.concatenate(embeddings)
    L_enc = LabelEncoder()
    labels_enc = L_enc.fit_transform(labels)
#     print(len(embeddings),embedding_array.shape,labels_enc.shape)
    clf = LogisticRegression(n_jobs=-1,class_weight='balanced')
    clf.fit(embedding_array,labels_enc)
    pickle.dump(L_enc,open(os.path.join(train_data_location,'label_encoder.pickle'),'wb'))
    pickle.dump(clf,open(os.path.join(train_data_location,'classifier.pickle'),'wb'))
    return clf, L_enc
    

In [15]:
def generate_plot(in_frame,clf,L_enc,thresh = .2,recog_thresh=.2):
    """
    input_frame
    clf : classfier for face recognition
    L_enc : label_encoder for face recognition
    initial_h: initial_height of frame
    initial_w : initial_width of frame
    """
    ### all detect and plot should be called sequently 
    ems = ['neutral', 'happy', 'sad', 'surprise', 'anger']
    initial_h,initial_w = in_frame.shape[:2]
    res_filt = generate_fd(in_frame,thresh=thresh)
    bboxes = np.multiply([[initial_w,initial_h,initial_w,initial_h]],(res_filt[:,3:])).astype('int')
    if len(bboxes)>0:
        
        
        probs = clf.predict_proba(generate_face_embedding(in_frame,bboxes))
        op = probs.argmax(axis=1)
        names = [L_enc.inverse_transform([i])[0] if probs[idx][i]>recog_thresh else 'UNKNOWN' for idx,i in enumerate(op)] 
        age_gender = generate_ag(in_frame,bboxes)
        em_list = generate_em(in_frame,bboxes)
        em_list = [ems[np.argmax(i['prob_emotion'])] for i in em_list]
        for name,a_g,b,em in zip(names,age_gender,bboxes,em_list):
            if a_g[1]=='Female':
                pink = (193,182,255)
                deep_pink = (193,20,255)
                
                caption = '('+str(a_g[0]) + ','+a_g[1]+','+ em+')'
                cv2.rectangle(in_frame, (b[0], b[1]), (b[2], b[3]), pink, 2)
                cv2.putText(in_frame, name.upper(),
                                    (b[0], b[1] - 50),cv2.FONT_HERSHEY_SIMPLEX, 0.6, deep_pink, 1)
                cv2.putText(in_frame, caption,
                                    (b[0]-10, b[1] - 20),cv2.FONT_HERSHEY_SIMPLEX, 0.6, deep_pink, 1)
            else:
                blue = (255,0,0)
                deep_blue = (200,0,0)
                caption = '('+str(a_g[0]) + ','+a_g[1]+','+ em+')'
                cv2.rectangle(in_frame, (b[0], b[1]), (b[2], b[3]), blue, 2)
                cv2.putText(in_frame, name.upper(),
                    (b[0], b[1] - 50),cv2.FONT_HERSHEY_SIMPLEX, 0.6, deep_blue, 1)
                cv2.putText(in_frame, caption,
                                    (b[0]-10, b[1] - 20),cv2.FONT_HERSHEY_SIMPLEX, 0.6, deep_blue, 1)
    return frame
    
    
    

In [16]:
Force_retraining = False
image_location = '../face_images/consolidated_data/'
if (os.path.isfile(os.path.join(image_location,'classifier.pickle')) & os.path.isfile(os.path.join(image_location,'label_encoder.pickle'))& (not Force_retraining)):
    clf = pickle.load(open(os.path.join(image_location,'classifier.pickle'),'rb'))
    L_enc = pickle.load(open(os.path.join(image_location,'label_encoder.pickle'),'rb'))
else:
    clf,L_enc = train_face_detection('../face_images/consolidated_data/')

In [17]:
channel = 0

In [22]:
cv2.namedWindow("Detection Results",cv2.WINDOW_NORMAL)
fd_thresh = .4
# if labels:
#     with open(labels, 'r') as f:
#         labels_map = [x.strip() for x in f]
# else:
#     labels_map = None
write_video = False
if write_video:
    out = None
    
cap = cv2.VideoCapture(channel)
retry_connect = 10
cur_request_id = 0
fps_fd = []
net_fps = []
while (cap.isOpened()):
    fps_fd = fps_fd[-100:]
    initial_w = cap.get(3)
    initial_h = cap.get(4)
    inf_start_fd = time.time()
    ret,frame  = cap.read()
    if not cap.isOpened():
        cap = cv2.VideoCapture(channel)
        retry_connect-=1
        if retry_connect<0:
            break
    frame = generate_plot(frame,clf,L_enc,thresh=.3,recog_thresh=.2)
    det_time_fd = time.time()- inf_start_fd

    fps_fd.append(1/det_time_fd)
    cv2.putText(frame, "Inference FPS Face detection: {:.3f} ".format(np.mean(fps_fd)), (10, int(initial_h - 50)), cv2.FONT_HERSHEY_COMPLEX, 0.5,
            (10, 10, 200), 1)
    net_fps.append(np.mean(fps_fd))
    
    #
    render_start = time.time()
    cv2.imshow("Detection Results", frame)
    render_end = time.time()
    render_time = render_end - render_start
    key = cv2.waitKey(1)

    if write_video:
        if out is None:
            out = cv2.VideoWriter('../output_vids/'+datetime.datetime.now().strftime("%Y_%m_%d_%H_%M_%S_Face_Pipeline")+'_out.mp4',cv2.VideoWriter_fourcc('M','J','P','G'), 20, (frame.shape[1],frame.shape[0]))
        out.write(frame)
        
    if key == 27:
        break
cv2.destroyAllWindows()
cap.release()
if write_video:
    out.release()
        
    

In [32]:
cap.release()