# Real-time Multi-object detection and tracking

# Imports

In [1]:
import numpy as np
import os
import six.moves.urllib as urllib
import sys
import tarfile
import tensorflow as tf
import zipfile
import cv2
import copy
import time
import datetime
from multiprocessing import Queue, Pool
from distutils.version import StrictVersion
from collections import defaultdict
from io import StringIO
from matplotlib import pyplot as plt
from PIL import Image
from tensorflow.core.framework import graph_pb2

if StrictVersion(tf.__version__) < StrictVersion('1.9.0'):
  raise ImportError('Please upgrade your TensorFlow installation to v1.9.* or later!')
%matplotlib inline

## Object detection imports
Here are the imports from the object detection module.

In [2]:
from utils import label_map_util
from utils import visualization_utils as vis_util
from session_worker import SessionWorker
from webcam_stream import WebcamVideoStream
from FPS_helper import FPS

This call to matplotlib.use() has no effect because the backend has already
been chosen; matplotlib.use() must be called *before* pylab, matplotlib.pyplot,
or matplotlib.backends is imported for the first time.



# Model preparation 

## Variables

In [3]:
#MODEL_NAME = 'ssd_mobilenet_v1_coco_2017_11_17'
MODEL_NAME = 'ssd_mobilenet_v11'
# Path to frozen detection graph. This is the actual model that is used for the object detection.
PATH_TO_FROZEN_GRAPH = MODEL_NAME + '/frozen_inference_graph.pb'

# List of the strings that is used to add correct label for each box.
PATH_TO_LABELS = os.path.join('data', 'mscoco_label_map.pbtxt')

NUM_CLASSES = 90

In [4]:
# Session Config: allow seperate GPU/CPU adressing and limit memory allocation
_tf_config = tf.ConfigProto(allow_soft_placement=True)
_tf_config.gpu_options.allow_growth=True

## Load a (frozen) Tensorflow model into memory.

In [10]:
def load_frozenmodel():
    
    print('Loading frozen model into memory...')
    
    def _node_name(n):
        if n.startswith("^"):
            return n[1:]
        else:
            return n.split(":")[0]
  
    # load a frozen Model and split it into GPU and CPU graphs
    # this is Hardcoded for ssd_mobilenet only
    input_graph = tf.Graph()
    with tf.Session(graph=input_graph):
        if ssd_shape == 600:
            shape = 7326
        else:
            shape = 1917
            
        score = tf.placeholder(tf.float32, shape=(None, shape, NUM_CLASSES), name="Postprocessor/convert_scores")
        expand = tf.placeholder(tf.float32, shape=(None, shape, 1, 4), name="Postprocessor/ExpandDims_1")
        
        for node in input_graph.as_graph_def().node:
            if node.name == "Postprocessor/convert_scores":
                score_def = node
            if node.name == "Postprocessor/ExpandDims_1":
                expand_def = node

    detection_graph = tf.Graph()
    with detection_graph.as_default():
        od_graph_def = tf.GraphDef()
        with tf.gfile.GFile(PATH_TO_FROZEN_GRAPH, 'rb') as fid:
            serialized_graph = fid.read()
            od_graph_def.ParseFromString(serialized_graph)
            dest_nodes = ['Postprocessor/convert_scores','Postprocessor/ExpandDims_1']

            edges = {}
            name_to_node_map = {}
            node_seq = {}
            seq = 0
            for node in od_graph_def.node:
                n = _node_name(node.name)
                name_to_node_map[n] = node
                edges[n] = [_node_name(x) for x in node.input]
                node_seq[n] = seq
                seq += 1
            for d in dest_nodes:
                assert d in name_to_node_map, "%s is not in graph" % d

            nodes_to_keep = set()
            next_to_visit = dest_nodes[:]

            while next_to_visit:
                n = next_to_visit[0]
                del next_to_visit[0]
                if n in nodes_to_keep: continue
                nodes_to_keep.add(n)
                next_to_visit += edges[n]

            nodes_to_keep_list = sorted(list(nodes_to_keep), key=lambda n: node_seq[n])
            nodes_to_remove = set()

            for n in node_seq:
                if n in nodes_to_keep_list: continue
                nodes_to_remove.add(n)
            nodes_to_remove_list = sorted(list(nodes_to_remove), key=lambda n: node_seq[n])

            keep = graph_pb2.GraphDef()
            for n in nodes_to_keep_list:
                keep.node.extend([copy.deepcopy(name_to_node_map[n])])

            remove = graph_pb2.GraphDef()
            remove.node.extend([score_def])
            remove.node.extend([expand_def])
            for n in nodes_to_remove_list:
                remove.node.extend([copy.deepcopy(name_to_node_map[n])])

            with tf.device('/gpu:0'):
                tf.import_graph_def(keep, name='')
            with tf.device('/cpu:0'):
                tf.import_graph_def(remove, name='')
        
        print("Done!")
        
        return detection_graph, score, expand

ssd_shape = 300
graph, score, expand = load_frozenmodel()

Loading frozen model into memory...
Done!


## Loading label map

In [6]:
label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
category_index = label_map_util.create_category_index(categories)

## Helper functions

In [7]:
def conv_detect2track(box, width, height):
    # transforms normalized to absolut coordinates
    ymin, xmin, ymax, xmax = box
    ymin = ymin*height
    xmin = xmin*width
    ymax = ymax*height
    xmax = xmax*width
    boxwidth= xmax - xmin
    boxheight = ymax - ymin
    
    newbox = [xmin,ymin, boxwidth, boxheight]
    #newbox = map(int,newbox)
    return newbox

def conv_track2detect(box, width, height):
    # transforms absolut to normalized coordinates
    dw = 1./width
    dh = 1./height
    x, y, boxwidth, boxheight = box #map(float,box)
    xmin = x * dw
    ymin = y * dh
    xmax = (x+boxwidth) * dw
    ymax = (y+boxheight) * dh
    
    newbox = np.array([ymin,xmin,ymax,xmax])
    return newbox

# Visualization functions

In [8]:
def vis_image(image, boxes, classes, scores, category_index, fps, visualize=False, det_interval=5, det_th=0.5, max_frames=500):
    if visualize:
        vis_util.visualize_boxes_and_labels_on_image_array(
        image,
        boxes, 
        classes, 
        scores,
        category_index,
        use_normalized_coordinates=True,
        line_thickness=3,
        min_score_thresh=.5)
        
        if fps:
            cv2.putText(image,"fps: {}".format(fps.fps_local()), (10,30),
                                    cv2.FONT_HERSHEY_SIMPLEX, 0.75, (77, 255, 9), 2)
        cv2.imshow('object_detection', image)
        
    elif not visualize and fps:
        # Exit after max frames if no visualization
        for box, score, _class in zip(boxes, scores, classes):
            if fps._glob_numFrames %det_interval==0 and score > det_th:
                label = category_index[_class]['name']
                print("label: {}\nscore: {}\nbox: {}".format(label, score, box))
    
    # Exit Option
    if visualize:
        if cv2.waitKey(1) & 0xFF == ord('q'):
            return False
    elif not visualize and fps:
        if fps._glob_numFrames >= max_frames:
            return False
    return True

# Detection

In [12]:
#Params
video_input         = 0
visualize           = True
vis_text            = True
max_frames          = 500
width               = 600
height              = 600
fps_interval        = 5
det_interval        = max_frames//10
det_th              = 0.5


def detection(detection_graph, category_index, score, expand):
    
    import sys
    sys.path.append(os.getcwd()+'/KCFpy')
    import kcftracker
    tracker = kcftracker.KCFTracker(False, True, False)
    tracker_counter = 0
    track = False
    
    print("Building the Graph..")
    
    with detection_graph.as_default():
        with tf.Session(graph=detection_graph,config=_tf_config) as sess:
            # Define Input and Ouput tensors
            image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
            detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
            detection_scores = detection_graph.get_tensor_by_name('detection_scores:0')
            detection_classes = detection_graph.get_tensor_by_name('detection_classes:0')
            num_detections = detection_graph.get_tensor_by_name('num_detections:0')
            
            score_out = detection_graph.get_tensor_by_name('Postprocessor/convert_scores:0')
            expand_out = detection_graph.get_tensor_by_name('Postprocessor/ExpandDims_1:0')
            score_in = detection_graph.get_tensor_by_name('Postprocessor/convert_scores_1:0')
            expand_in = detection_graph.get_tensor_by_name('Postprocessor/ExpandDims_1_1:0')
            
            # Threading
            gpu_worker = SessionWorker("GPU",detection_graph,_tf_config)
            cpu_worker = SessionWorker("CPU",detection_graph,_tf_config)
            gpu_opts = [score_out, expand_out]
            cpu_opts = [detection_boxes, detection_scores, detection_classes, num_detections]
            gpu_counter = 0
            cpu_counter = 0
            
            # Start Video Stream and FPS calculation
            fps = FPS(fps_interval).start()
            vs = WebcamVideoStream(video_input,width,height).start()
            
            print("Starting the Detection..")
            
            while vs.isActive():
                if not track:
                    # split model in seperate gpu and cpu session threads
                    if gpu_worker.is_sess_empty():
                        # read video frame, expand dimensions and convert to rgb
                        image = vs.read()

                        image_expanded = np.expand_dims(image, axis=0)
                        # put new queue
                        gpu_feeds = {image_tensor: image_expanded}
                        if visualize:
                            gpu_extras = image # for visualization frame
                        else:
                            gpu_extras = None
                        gpu_worker.put_sess_queue(gpu_opts,gpu_feeds,gpu_extras)

                    g = gpu_worker.get_result_queue()

                    if g is None:
                        # gpu thread has no output queue. ok skip, let's check cpu thread.
                        gpu_counter += 1

                    else:
                        # gpu thread has output queue.
                        gpu_counter = 0
                        score,expand,image = g["results"][0],g["results"][1],g["extras"]

                        if cpu_worker.is_sess_empty():
                            # When cpu thread has no next queue, put new queue.
                            # else, drop gpu queue.
                            cpu_feeds = {score_in: score, expand_in: expand}
                            cpu_extras = image
                            cpu_worker.put_sess_queue(cpu_opts,cpu_feeds,cpu_extras)

                    c = cpu_worker.get_result_queue()
                    if c is None:
                        # cpu thread has no output queue. ok, nothing to do. continue
                        cpu_counter += 1
                        time.sleep(0.005)
                        continue # If CPU RESULT has not been set yet, no fps update

                    else:
                        cpu_counter = 0
                        boxes, scores, classes, num, image = c["results"][0],c["results"][1],c["results"][2],c["results"][3],c["extras"]
                    
                    boxes, classes, scores = np.squeeze(boxes),np.squeeze(classes).astype(np.int32),np.squeeze(scores)
                    vis = vis_image(image, boxes, classes, scores, category_index, fps,
                                        visualize, det_interval, det_th, max_frames)
                    if not vis:
                        break
                        
                    
                    # Activate Tracker
                    if num <= 5: # Max number of objects to track
                        tracker_frame = image
                        track = True
                        first_track = True
                
                # Tracking
                else:
                    frame = vs.read()

                    if first_track:
                        trackers = []
                        tracker_boxes = boxes
                        for box in boxes[~np.all(boxes == 0, axis=1)]:
                                tracker.init(conv_detect2track(box,vs.real_width, vs.real_height), tracker_frame)
                                trackers.append(tracker)
                        first_track = False

                    for idx,tracker in enumerate(trackers):
                        tracker_box = tracker.update(frame)
                        tracker_boxes[idx,:] = conv_track2detect(tracker_box, vs.real_width, vs.real_height)

                    vis = vis_image(image, boxes, classes, scores, category_index, fps,
                                        visualize, det_interval, det_th, max_frames)
                    if not vis:
                        break

                    tracker_counter += 1
                    if tracker_counter >= 20: #Number of tracked frames between detections
                        track = False
                        tracker_counter = 0

                
                fps.update()

   
    gpu_worker.stop()
    cpu_worker.stop()
    fps.stop()
    vs.stop()


detection(graph, category_index, score, expand)


Building the Graph..
Start video stream with shape: 640,480
(Press 'q' to Exit)
Starting the Detection..
> FPS: 0.1
> FPS: 61.5
> FPS: 52.9
> FPS: 53.8
> FPS: 50.5
> [INFO] elapsed time (total): 30.44
> [INFO] approx. FPS: 41.07
