In [1]:
from __future__ import print_function
import sys
import os
from argparse import ArgumentParser, SUPPRESS
import cv2,threading
import time,datetime
import logging as log
from openvino.inference_engine import IENetwork, IEPlugin
import numpy as np
from multiprocessing import Process,Pool
import shutil,pickle
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import SGDClassifier,LogisticRegression

In [2]:
m_ret = '../model_files/Retinanet/resnet50_coco_best_v2.1.0.xml'
channel = 'rtsp://admin:admin@123@10.10.12.14:554/Streaming/Channels/401/'
cpu_extension = '../build_samples/intel64/Release/lib/libcpu_extension.so'
device = 'CPU'


In [3]:
# Plugin initialization for specified device and load extensions library if specified
log.info("Initializing plugin for {} device...".format('CPU'))
plugin = IEPlugin(device='CPU')
if cpu_extension and 'CPU' in device:
    plugin.add_cpu_extension(cpu_extension)
# Read IR
log.info("Reading IR...")
m_retinanet= IENetwork(model=m_ret, weights=os.path.splitext(m_ret)[0] + ".bin")




In [4]:
if device == "CPU":
    for net in [m_retinanet]:
        supported_layers = plugin.get_supported_layers(net)
        not_supported_layers = [l for l in net.layers.keys() if l not in supported_layers]
        if len(not_supported_layers) != 0:
            log.error("Following layers are not supported by the plugin for specified device {}:\n {}".
                      format(plugin.device, ', '.join(not_supported_layers)))
            log.error("Please try to specify cpu extensions library path in demo's command line parameters using -l "
                      "or --cpu_extension command line argument")
            sys.exit(1)

In [5]:
input_blob = []
out_blob = []
exec_net = []
for i,net in enumerate([m_retinanet]):
    # assert len(net.inputs.keys()) == 1, "Demo supports only single input topologies"
    # assert len(net.outputs) == 1, "Demo supports only single output topologies"
    input_blob.append(next(iter(net.inputs)))
    out_blob.append(next(iter(net.outputs)))
    log.info("Loading IR to the plugin...")
    exec_net.append(plugin.load(network=net, num_requests=2))
    # Read and pre-process input image
    n, c, h, w = net.inputs[input_blob[i]].shape
    print(n,h,c,w)


1 1080 3 1920


In [6]:
def preprocess_image(x, mode='caffe'):
    """ Preprocess an image by subtracting the ImageNet mean.
    Args
        x: np.array of shape (None, None, 3) or (3, None, None).
        mode: One of "caffe" or "tf".
            - caffe: will zero-center each color channel with
                respect to the ImageNet dataset, without scaling.
            - tf: will scale pixels between -1 and 1, sample-wise.
    Returns
        The input with the ImageNet mean subtracted.
    """
    # mostly identical to "https://github.com/keras-team/keras-applications/blob/master/keras_applications/imagenet_utils.py"
    # except for converting RGB -> BGR since we assume BGR already

    # covert always to float32 to keep compatibility with opencv
    x = x.astype(np.float32)

    if mode == 'tf':
        x /= 127.5
        x -= 1.
    elif mode == 'caffe':
        x[..., 0] -= 103.939
        x[..., 1] -= 116.779
        x[..., 2] -= 123.68

    return x
def compute_resize_scale(image_shape, min_side=800, max_side=1333):
    """ Compute an image scale such that the image size is constrained to min_side and max_side.
    Args
        min_side: The image's min side will be equal to min_side after resizing.
        max_side: If after resizing the image's max side is above max_side, resize until the max side is equal to max_side.
    Returns
        A resizing scale.
    """
    (rows, cols, _) = image_shape

    smallest_side = min(rows, cols)

    # rescale the image so the smallest side is min_side
    scale = min_side / smallest_side

    # check if the largest side is now greater than max_side, which can happen
    # when images have a large aspect ratio
    largest_side = max(rows, cols)
    if largest_side * scale > max_side:
        scale = max_side / largest_side

    return scale


def resize_image(img, min_side=800, max_side=1333):
    """ Resize an image such that the size is constrained to min_side and max_side.
    Args
        min_side: The image's min side will be equal to min_side after resizing.
        max_side: If after resizing the image's max side is above max_side, resize until the max side is equal to max_side.
    Returns
        A resized image.
    """
    # compute scale to resize the image
    scale = compute_resize_scale(img.shape, min_side=min_side, max_side=max_side)

    # resize the image with the computed scale
    img = cv2.resize(img, None, fx=scale, fy=scale)

    return img, scale

In [13]:
def generate_detection(input_frame,n=1,c=3,w=1920,h=1080,thresh=.1):
    op_frame = cv2.resize(input_frame,(w,h)).transpose((2, 0, 1)).reshape(n,c,h,w) 
    ### we can add multiple requests and just enumerate request ids
    exec_net[0].start_async(request_id=1, inputs={input_blob[0]: op_frame})
    if exec_net[0].requests[1].wait(-1)==0:
        res = exec_net[0].requests[1].outputs[out_blob[0]]
    res_filt =  res[np.where(res[:,:,:,2]>thresh)]
    res_filt = res_filt[np.min(res_filt,axis=1)>=0]
    return res_filt
    

In [8]:
labels_to_names = {0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'airplane', 5: 'bus', 6: 'train', 7: 'truck', 8: 'boat', 9: 'traffic light', 10: 'fire hydrant', 11: 'stop sign', 12: 'parking meter', 13: 'bench', 14: 'bird', 15: 'cat', 16: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow', 20: 'elephant', 21: 'bear', 22: 'zebra', 23: 'giraffe', 24: 'backpack', 25: 'umbrella', 26: 'handbag', 27: 'tie', 28: 'suitcase', 29: 'frisbee', 30: 'skis', 31: 'snowboard', 32: 'sports ball', 33: 'kite', 34: 'baseball bat', 35: 'baseball glove', 36: 'skateboard', 37: 'surfboard', 38: 'tennis racket', 39: 'bottle', 40: 'wine glass', 41: 'cup', 42: 'fork', 43: 'knife', 44: 'spoon', 45: 'bowl', 46: 'banana', 47: 'apple', 48: 'sandwich', 49: 'orange', 50: 'broccoli', 51: 'carrot', 52: 'hot dog', 53: 'pizza', 54: 'donut', 55: 'cake', 56: 'chair', 57: 'couch', 58: 'potted plant', 59: 'bed', 60: 'dining table', 61: 'toilet', 62: 'tv', 63: 'laptop', 64: 'mouse', 65: 'remote', 66: 'keyboard', 67: 'cell phone', 68: 'microwave', 69: 'oven', 70: 'toaster', 71: 'sink', 72: 'refrigerator', 73: 'book', 74: 'clock', 75: 'vase', 76: 'scissors', 77: 'teddy bear', 78: 'hair drier', 79: 'toothbrush'}


In [9]:
colors_labels = pd.read_pickle('retinanet_coco_labels_colors.pickle')

In [10]:
def generate_plot(in_frame,thresh=.2):
    """
    person,0   
    helmet,1   
    no_helmet,2
    vest,3     
    no_vest,4  
    worker,5  
    """
    initial_h,initial_w = in_frame.shape[:2]
    res_filt = generate_detection(frame,thresh=thresh)
    bboxes = np.multiply([[initial_w,initial_h,initial_w,initial_h]],(res_filt[:,3:])).astype('int')
    #colors = [(0,0,0),(0,255,0),(0,0,255),(0,255,0),(0,0,255),(255,0,0)]
    labels = res_filt[:,1].astype(int).flatten()
#     print(labels)
    for idx,b in enumerate(bboxes):
        #print(idx,res_filt,bboxes)
        in_frame = cv2.rectangle(in_frame, (b[0], b[1]), (b[2], b[3]),colors_labels.loc[labels[idx]-1]['colors'] , 2)
        cv2.putText(frame, labels_to_names[labels[idx-1]], (b[0]-15,b[1]-15), cv2.FONT_HERSHEY_COMPLEX, 0.5,
            (10, 10, 200), 1)
    return in_frame

In [11]:
channel = '../dataset/general/Candolim Beach Goa - Candolim Beach Tour & Travel Guide-_W-NaHPzwiM.mp4'

In [14]:
cv2.namedWindow("Detection Results",cv2.WINDOW_NORMAL)
fd_thresh = .1
# if labels:
#     with open(labels, 'r') as f:
#         labels_map = [x.strip() for x in f]
# else:
#     labels_map = None
write_video = False
if write_video:
    out = None
cap = cv2.VideoCapture(channel)
retry_connect = 10
cur_request_id = 0
fps_fd = []
net_fps = []
while (cap.isOpened()):
    fps_fd = fps_fd[-100:]
    initial_w = cap.get(3)
    initial_h = cap.get(4)
    inf_start_fd = time.time()
    for i in range(10):
        ret,frame  = cap.read()
    if not cap.isOpened():
        cap = cv2.VideoCapture(channel)
        retry_connect-=1
        if retry_connect<0:
            break
    # preprocess image for network
#     frame = preprocess_image(frame)
#     frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    #image, scale = resize_image(image)
    frame = generate_plot(frame,thresh=.2)
    det_time_fd = time.time()- inf_start_fd

    fps_fd.append(1/det_time_fd)
    cv2.putText(frame, "Inference FPS  detection: {:.3f} ".format(np.mean(fps_fd)), (10, int(initial_h - 50)), cv2.FONT_HERSHEY_COMPLEX, 0.5,
            (10, 10, 200), 1)
    net_fps.append(np.mean(fps_fd))
    
    #
    if write_video:
        if out is None:
            out = cv2.VideoWriter('../output_vids/'+datetime.datetime.now().strftime("%Y_%m_%d_%H_%M_%S_")+os.path.basename(channel)+'_out.mp4',cv2.VideoWriter_fourcc('M','J','P','G'), 20, (frame.shape[1],frame.shape[0]))
        out.write(frame)
    cv2.imshow("Detection Results", frame)

    key = cv2.waitKey(1)
    
    if key == 27 :
        break
if write_video:
    out.release()
cv2.destroyAllWindows()
cap.release()
    
        
    