# Real-time Multi-object detection and tracking

# Imports

In [10]:
import numpy as np
import os
import six.moves.urllib as urllib
import sys
import tarfile
import tensorflow as tf
import zipfile
import cv2
import time
from multiprocessing import Queue, Pool
from distutils.version import StrictVersion
from collections import defaultdict
from io import StringIO
from matplotlib import pyplot as plt
from PIL import Image

if StrictVersion(tf.__version__) < StrictVersion('1.9.0'):
  raise ImportError('Please upgrade your TensorFlow installation to v1.9.* or later!')
%matplotlib inline

## Object detection imports
Here are the imports from the object detection module.

In [14]:
from utils import label_map_util

from utils import visualization_utils as vis_util
#from imutils.video import WebcamVideoStream
from imutils.video import FPS

# Model preparation 

## Variables

In [16]:
MODEL_NAME = 'ssd_mobilenet_v1_coco_2017_11_17'
# Path to frozen detection graph. This is the actual model that is used for the object detection.
PATH_TO_FROZEN_GRAPH = MODEL_NAME + '/frozen_inference_graph.pb'

# List of the strings that is used to add correct label for each box.
PATH_TO_LABELS = os.path.join('data', 'mscoco_label_map.pbtxt')

## Load a (frozen) Tensorflow model into memory.

In [17]:
detection_graph = tf.Graph()
with detection_graph.as_default():
  od_graph_def = tf.GraphDef()
  with tf.gfile.GFile(PATH_TO_FROZEN_GRAPH, 'rb') as fid:
    serialized_graph = fid.read()
    od_graph_def.ParseFromString(serialized_graph)
    tf.import_graph_def(od_graph_def, name='')

## Loading label map

In [7]:
#category_index = label_map_util.create_category_index_from_labelmap(PATH_TO_LABELS, use_display_name=True)
NUM_CLASSES = 90
label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
category_index = label_map_util.create_category_index(categories)

## Helper functions

In [18]:
from threading import Thread
class WebcamVideoStream:
    def __init__(self, src, width, height):
        # initialize the video camera stream and read the first frame
        # from the stream
        self.stream = cv2.VideoCapture(src)
        self.stream.set(cv2.CAP_PROP_FRAME_WIDTH, width)
        self.stream.set(cv2.CAP_PROP_FRAME_HEIGHT, height)
        (self.grabbed, self.frame) = self.stream.read()
        
        # initialize the variable used to indicate if the thread should
        # be stopped
        self.stopped = False

    def start(self):
        # start the thread to read frames from the video stream
        Thread(target=self.update, args=()).start()
        return self

    def update(self):
        # keep looping infinitely until the thread is stopped
        while True:
            # if the thread indicator variable is set, stop the thread
            if self.stopped:
                self.stream.release()
                return

            # otherwise, read the next frame from the stream
            (self.grabbed, self.frame) = self.stream.read()

    def read(self):
        # return the frame most recently read
        return self.frame

    def stop(self):
        # indicate that the thread should be stopped
        self.stopped = True

In [9]:
def conv_detect2track(box, width, height):
    # transforms normalized to absolut coords
    ymin, xmin, ymax, xmax = box
    ymin = ymin*height
    xmin = xmin*width
    ymax = ymax*height
    xmax = xmax*width
    boxwidth= xmax - xmin
    boxheight = ymax - ymin
    
    newbox = [xmin,ymin, boxwidth, boxheight]
    #newbox = map(int,newbox)
    return newbox

def conv_track2detect(box, width, height):
    # transforms absolut to normalized coords
    dw = 1./width
    dh = 1./height
    x, y, boxwidth, boxheight = box #map(float,box)
    xmin = x * dw
    ymin = y * dh
    xmax = (x+boxwidth) * dw
    ymax = (y+boxheight) * dh
    
    newbox = np.array([ymin,xmin,ymax,xmax])
    return newbox

In [10]:
def worker(input_q, output_q):
    
    from KCFpy import kcftracker
    tracker = kcftracker.KCFTracker(False, True, False)
    tracker_counter = 0
    track = False

    detection_graph = tf.Graph()
    with detection_graph.as_default():
        od_graph_def = tf.GraphDef()
        with tf.gfile.GFile(PATH_TO_FROZEN_GRAPH, 'rb') as fid:
            serialized_graph = fid.read()
            od_graph_def.ParseFromString(serialized_graph)
            tf.import_graph_def(od_graph_def, name='')

        sess = tf.Session(graph=detection_graph)

    fps = FPS().start()
    frame_counter = 0
    printer = False #Using this boolean to print only few frame's details as text
    
    while True:
        if not track:
            image_np = input_q.get()
            frame_counter += 1
            if frame_counter % 10 == 0:
                printer = True

            image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
            detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
            detection_scores = detection_graph.get_tensor_by_name('detection_scores:0')
            detection_classes = detection_graph.get_tensor_by_name('detection_classes:0')
            num_detections = detection_graph.get_tensor_by_name('num_detections:0')
            #print(image_np.shape)
              # to have a shape [1, None, None, 3]
            image_np_expanded = np.expand_dims(image_np, axis=0)
              # Detection.
            (boxes, scores, classes, num) = sess.run(
                  [detection_boxes, detection_scores, detection_classes, num_detections],
                  feed_dict={image_tensor: image_np_expanded})
              # Visualization
            boxes, classes, scores = np.squeeze(boxes),np.squeeze(classes).astype(np.int32),np.squeeze(scores)
            vis_util.visualize_boxes_and_labels_on_image_array(
                  image_np,
                  boxes, 
                  classes, 
                  scores,
                  category_index,
                  use_normalized_coordinates=True,
                  line_thickness=3,
                  min_score_thresh=.5)

            for box, score, _class in zip(boxes, scores, classes):
                if printer:
                    label = category_index[_class]['name']
                    print("label: {}\nscore: {}\nbox: {}".format(label, score, box))
                    printer = False

            output_q.put(image_np)
            
            # Activate Tracker
            if num <= 5: # Max number of objects to track
                tracker_frame = image_np
                track = True
                first_track = True

        # Tracking
        else:
            frame = input_q.get()
            frame_counter += 1
            if frame_counter % 10 == 0:
                printer = True
                
            if first_track:
                trackers = []
                tracker_boxes = boxes
                for box in boxes[~np.all(boxes == 0, axis=1)]:
                        tracker.init(conv_detect2track(box,480, 360), tracker_frame)
                        trackers.append(tracker)
                first_track = False

            for idx,tracker in enumerate(trackers):
                tracker_box = tracker.update(frame)
                tracker_boxes[idx,:] = conv_track2detect(tracker_box, 480, 360)

            vis_util.visualize_boxes_and_labels_on_image_array(
                  frame,
                  boxes, 
                  classes, 
                  scores,
                  category_index,
                  use_normalized_coordinates=True,
                  line_thickness=3,
                  min_score_thresh=.5)
    
            tracker_counter += 1
            if tracker_counter >= 20: #Number of tracked frames between detections
                track = False
                tracker_counter = 0
            
            output_q.put(frame)
            
        fps.update()
        
    fps.stop()
    sess.close()

# Detection

In [12]:
input_q = Queue(maxsize=5)
output_q = Queue(maxsize=5)
pool = Pool(2, worker, (input_q, output_q))


print('[INFO] sampling THREADED frames from webcam...')
vs = WebcamVideoStream(src=0,width = 480,height=360).start()

fourcc = cv2.VideoWriter_fourcc(*"MJPG")

out = cv2.VideoWriter('output.avi', fourcc, 5.0, (544, 288),True)
frame_counter = 0

fps = FPS().start()
f_count = 0

while True:
    frame = vs.read()
    input_q.put(frame)

    t = time.time()
    
    if output_q.empty():
        pass  # fill up queue
    else:
        output_rgb = output_q.get()
        out.write(output_rgb)
        cv2.imshow('Video', output_rgb)
        f_count += 1
        if f_count >= 50: #For testing.. limiting the number of frames
            break
        
    fps.update()

    #print('[INFO] elapsed time: {:.2f}'.format(time.time() - t))

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

fps.stop()
print('[INFO] elapsed time (total): {:.2f}'.format(fps.elapsed()))
print('[INFO] approx. FPS: {:.2f}'.format(fps.fps()))

pool.terminate()
vs.stop()
out.release()
cv2.destroyAllWindows()

[INFO] sampling THREADED frames from webcam...
label: person
score: 0.530849039555
box: [0.40009513 0.21527338 0.9906151  0.8359817 ]
label: person
score: 0.530849039555
box: [0.40009513 0.21527338 0.9906151  0.8359817 ]
label: person
score: 0.799234867096
box: [0.31424657 0.1791535  0.99457395 0.87318844]
label: person
score: 0.799234867096
box: [0.31424657 0.1791535  0.99457395 0.87318844]
[INFO] elapsed time (total): 8.45
[INFO] approx. FPS: 6.63
