# Real-time Multi-object detection and tracking

# Imports

In [1]:
import numpy as np
import os
import six.moves.urllib as urllib
import sys
import tarfile
import tensorflow as tf
import zipfile
import cv2
import copy
import time
import datetime
from multiprocessing import Queue, Pool
from distutils.version import StrictVersion
from collections import defaultdict
from io import StringIO
from matplotlib import pyplot as plt
from PIL import Image
from tensorflow.core.framework import graph_pb2

if StrictVersion(tf.__version__) < StrictVersion('1.9.0'):
  raise ImportError('Please upgrade your TensorFlow installation to v1.9.* or later!')
%matplotlib inline

## Object detection imports
Here are the imports from the object detection module.

In [2]:
from utils import label_map_util

from utils import visualization_utils as vis_util
#from imutils.video import WebcamVideoStream
from imutils.video import FPS

This call to matplotlib.use() has no effect because the backend has already
been chosen; matplotlib.use() must be called *before* pylab, matplotlib.pyplot,
or matplotlib.backends is imported for the first time.

The backend was *originally* set to 'module://ipykernel.pylab.backend_inline' by the following code:
  File "/usr/lib/python2.7/runpy.py", line 174, in _run_module_as_main
    "__main__", fname, loader, pkg_name)
  File "/usr/lib/python2.7/runpy.py", line 72, in _run_code
    exec code in run_globals
  File "/home/nvidia/.local/lib/python2.7/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/home/nvidia/.local/lib/python2.7/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/home/nvidia/.local/lib/python2.7/site-packages/ipykernel/kernelapp.py", line 499, in start
    self.io_loop.start()
  File "/home/nvidia/.local/lib/python2.7/site-packages/tornado/ioloop.py", line 1073, in start

# Model preparation 

## Variables

In [3]:
#MODEL_NAME = 'ssd_mobilenet_v1_coco_2017_11_17'
MODEL_NAME = 'ssd_mobilenet_v11'
# Path to frozen detection graph. This is the actual model that is used for the object detection.
PATH_TO_FROZEN_GRAPH = MODEL_NAME + '/frozen_inference_graph.pb'

# List of the strings that is used to add correct label for each box.
PATH_TO_LABELS = os.path.join('data', 'mscoco_label_map.pbtxt')

NUM_CLASSES = 90

In [4]:
# Session Config: allow seperate GPU/CPU adressing and limit memory allocation
_tf_config = tf.ConfigProto(allow_soft_placement=True)
_tf_config.gpu_options.allow_growth=True

## Load a (frozen) Tensorflow model into memory.

In [14]:
def load_frozenmodel():
    
    print('Loading frozen model into memory...')
    
    def _node_name(n):
        if n.startswith("^"):
            return n[1:]
        else:
            return n.split(":")[0]
  
    # load a frozen Model and split it into GPU and CPU graphs
    # this is Hardcoded for ssd_mobilenet only
    input_graph = tf.Graph()
    with tf.Session(graph=input_graph):
        if ssd_shape == 600:
            shape = 7326
        else:
            shape = 1917
            
        score = tf.placeholder(tf.float32, shape=(None, shape, NUM_CLASSES), name="Postprocessor/convert_scores")
        expand = tf.placeholder(tf.float32, shape=(None, shape, 1, 4), name="Postprocessor/ExpandDims_1")
        
        for node in input_graph.as_graph_def().node:
            if node.name == "Postprocessor/convert_scores":
                score_def = node
            if node.name == "Postprocessor/ExpandDims_1":
                expand_def = node

    detection_graph = tf.Graph()
    with detection_graph.as_default():
        od_graph_def = tf.GraphDef()
        with tf.gfile.GFile(PATH_TO_FROZEN_GRAPH, 'rb') as fid:
            serialized_graph = fid.read()
            od_graph_def.ParseFromString(serialized_graph)
            dest_nodes = ['Postprocessor/convert_scores','Postprocessor/ExpandDims_1']

            edges = {}
            name_to_node_map = {}
            node_seq = {}
            seq = 0
            for node in od_graph_def.node:
                n = _node_name(node.name)
                name_to_node_map[n] = node
                edges[n] = [_node_name(x) for x in node.input]
                node_seq[n] = seq
                seq += 1
            for d in dest_nodes:
                assert d in name_to_node_map, "%s is not in graph" % d

            nodes_to_keep = set()
            next_to_visit = dest_nodes[:]

            while next_to_visit:
                n = next_to_visit[0]
                del next_to_visit[0]
                if n in nodes_to_keep: continue
                nodes_to_keep.add(n)
                next_to_visit += edges[n]

            nodes_to_keep_list = sorted(list(nodes_to_keep), key=lambda n: node_seq[n])
            nodes_to_remove = set()

            for n in node_seq:
                if n in nodes_to_keep_list: continue
                nodes_to_remove.add(n)
            nodes_to_remove_list = sorted(list(nodes_to_remove), key=lambda n: node_seq[n])

            keep = graph_pb2.GraphDef()
            for n in nodes_to_keep_list:
                keep.node.extend([copy.deepcopy(name_to_node_map[n])])

            remove = graph_pb2.GraphDef()
            remove.node.extend([score_def])
            remove.node.extend([expand_def])
            for n in nodes_to_remove_list:
                remove.node.extend([copy.deepcopy(name_to_node_map[n])])

            with tf.device('/gpu:0'):
                tf.import_graph_def(keep, name='')
            with tf.device('/cpu:0'):
                tf.import_graph_def(remove, name='')

        return detection_graph, score, expand

ssd_shape = 300
graph, score, expand = load_frozenmodel()

Loading frozen model into memory...


## Loading label map

In [6]:
label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
category_index = label_map_util.create_category_index(categories)

## Helper functions

In [7]:
class FPS2:
    def __init__(self, interval):
        self._glob_start = None
        self._glob_end = None
        self._glob_numFrames = 0
        self._local_start = None
        self._local_numFrames = 0
        self._interval = interval
        self.curr_local_elapsed = None
        self.first = False

    def start(self):
        self._glob_start = datetime.datetime.now()
        self._local_start = self._glob_start
        return self

    def stop(self):
        self._glob_end = datetime.datetime.now()

    def update(self):
        self.first = True
        curr_time = datetime.datetime.now()
        self.curr_local_elapsed = (curr_time - self._local_start).total_seconds()
        self._glob_numFrames += 1
        self._local_numFrames += 1
        if self.curr_local_elapsed > self._interval:
          print("> FPS: {}".format(self.fps_local()))
          self._local_numFrames = 0
          self._local_start = curr_time

    def elapsed(self):
        return (self._glob_end - self._glob_start).total_seconds()

    def fps(self):
        return self._glob_numFrames / self.elapsed()
    
    def fps_local(self):
        if self.first:
            return round(self._local_numFrames / self.curr_local_elapsed,1)
        else:
            return 0.0

# Detection

In [13]:
from session_worker import SessionWorker
from webcam_stream import WebcamVideoStream
import time

#Params
video_input         = 0
visualize           = True
vis_text            = True
max_frames          = 200
width               = 600
height              = 600
fps_interval        = 5
det_interval        = max_frames//10
det_th              = 0.5


def detection(detection_graph, category_index, score, expand):
    print("Building the Graph..")
    
   
    cur_frames = 0
    with detection_graph.as_default():
        with tf.Session(graph=detection_graph,config=_tf_config) as sess:
            # Define Input and Ouput tensors
            image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
            detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
            detection_scores = detection_graph.get_tensor_by_name('detection_scores:0')
            detection_classes = detection_graph.get_tensor_by_name('detection_classes:0')
            num_detections = detection_graph.get_tensor_by_name('num_detections:0')
            
            score_out = detection_graph.get_tensor_by_name('Postprocessor/convert_scores:0')
            expand_out = detection_graph.get_tensor_by_name('Postprocessor/ExpandDims_1:0')
            score_in = detection_graph.get_tensor_by_name('Postprocessor/convert_scores_1:0')
            expand_in = detection_graph.get_tensor_by_name('Postprocessor/ExpandDims_1_1:0')
            
            # Threading
            gpu_worker = SessionWorker("GPU",detection_graph,_tf_config)
            cpu_worker = SessionWorker("CPU",detection_graph,_tf_config)
            gpu_opts = [score_out, expand_out]
            cpu_opts = [detection_boxes, detection_scores, detection_classes, num_detections]
            gpu_counter = 0
            cpu_counter = 0
            
            # Start Video Stream and FPS calculation
            fps = FPS2(fps_interval).start()
            video_stream = WebcamVideoStream(video_input,width,height).start()
            cur_frames = 0
            print("(Press 'q' to Exit)")
            print("Starting the Detection..")
            
            while video_stream.isActive():
               
                # split model in seperate gpu and cpu session threads
                if gpu_worker.is_sess_empty():
                    # read video frame, expand dimensions and convert to rgb
                    image = video_stream.read()
                    
                    image_expanded = np.expand_dims(image, axis=0)
                    # put new queue
                    gpu_feeds = {image_tensor: image_expanded}
                    if visualize:
                        gpu_extras = image # for visualization frame
                    else:
                        gpu_extras = None
                    gpu_worker.put_sess_queue(gpu_opts,gpu_feeds,gpu_extras)

                g = gpu_worker.get_result_queue()
                
                if g is None:
                    # gpu thread has no output queue. ok skip, let's check cpu thread.
                    gpu_counter += 1
                
                else:
                    # gpu thread has output queue.
                    gpu_counter = 0
                    score,expand,image = g["results"][0],g["results"][1],g["extras"]

                    if cpu_worker.is_sess_empty():
                        # When cpu thread has no next queue, put new queue.
                        # else, drop gpu queue.
                        cpu_feeds = {score_in: score, expand_in: expand}
                        cpu_extras = image
                        cpu_worker.put_sess_queue(cpu_opts,cpu_feeds,cpu_extras)

                c = cpu_worker.get_result_queue()
                if c is None:
                    # cpu thread has no output queue. ok, nothing to do. continue
                    cpu_counter += 1
                    time.sleep(0.005)
                    continue # If CPU RESULT has not been set yet, no fps update
                
                else:
                    cpu_counter = 0
                    boxes, scores, classes, num, image = c["results"][0],c["results"][1],c["results"][2],c["results"][3],c["extras"]

                # Visualization of the results of a detection.
                if visualize:
                    vis_util.visualize_boxes_and_labels_on_image_array(
                        image,
                        np.squeeze(boxes),
                        np.squeeze(classes).astype(np.int32),
                        np.squeeze(scores),
                        category_index,
                        use_normalized_coordinates=True,
                        line_thickness=8)
                    if vis_text:
                        cv2.putText(image,"fps: {}".format(fps.fps_local()), (10,30),
                                    cv2.FONT_HERSHEY_SIMPLEX, 0.75, (77, 255, 9), 2)
                    cv2.imshow('object_detection', image)
                    # Exit Option
                    if cv2.waitKey(1) & 0xFF == ord('q'):
                        break
                else:
                    cur_frames += 1
                    # Exit after max frames if no visualization
                    for box, score, _class in zip(np.squeeze(boxes), np.squeeze(scores), np.squeeze(classes)):
                        if cur_frames%det_interval==0 and score > det_th:
                            label = category_index[_class]['name']
                            print("> label: {}\nscore: {}\nbox: {}".format(label, score, box))
                    if cur_frames >= max_frames:
                        break
                fps.update()

   
    gpu_worker.stop()
    cpu_worker.stop()
    fps.stop()
    video_stream.stop()
    cv2.destroyAllWindows()
    print('> [INFO] elapsed time (total): {:.2f}'.format(fps.elapsed()))
    print('> [INFO] approx. FPS: {:.2f}'.format(fps.fps()))


detection(graph, category_index, score, expand)


Building the Graph..
> Start video stream with shape: 640,480
(Press 'q' to Exit)
Starting the Detection..
> FPS: 0.0
> FPS: 18.4
> FPS: 17.2
> FPS: 18.0
> FPS: 17.7
> FPS: 17.6
> FPS: 17.9
> [INFO] elapsed time (total): 55.87
> [INFO] approx. FPS: 10.42
