# Person Counter: Prediction
This notebook contains code for prediction using pre-trained models. It stores the output in a pickle file

In [8]:
DATASET = "MOT16"
MODEL_ID = 1

# Imports

In [29]:
import numpy as np
import os
import six.moves.urllib as urllib
import sys
import tarfile
import tensorflow as tf
import zipfile
import xml.etree.ElementTree as ET
import copy

from collections import defaultdict
from io import StringIO
#from matplotlib import pyplot as plt # Commented because of warning that matplot lib is already loaded
from PIL import Image
import pickle
import time
import csv

# This is needed since the notebook is stored in the object_detection folder.
sys.path.append("..")
from object_detection.utils import ops as utils_ops

if tf.__version__ < '1.4.0':
  raise ImportError('Please upgrade your tensorflow installation to v1.4.* or later!')


## Env setup

In [10]:
# This is needed to display the images.
%matplotlib inline

## Object detection imports
Here are the imports from the object detection module.

In [11]:
sys.path.append("../obj_det/")
from utils import label_map_util
#from utils import visualization_utils as vis_util
from utils import object_detection_evaluation as od_eval

# Model preparation 

## Variables

We use models from the [detection model zoo](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/detection_model_zoo.md)

In [14]:
# Models used in paper
ALL_MODEL = ['ssd_mobilenet_v1_coco_2017_11_17' #0
    ,'ssd_inception_v2_coco_2017_11_17' #1
    ,'rfcn_resnet101_coco_2018_01_28' #2
    ,'faster_rcnn_resnet101_coco_2018_01_28' #3
    ,'faster_rcnn_inception_v2_coco_2018_01_28' #4
]

MODEL_NAME = ALL_MODEL[MODEL_ID]

# Path to frozen detection graph. This is the actual model that is used for the object detection.
OD_DIR = '../obj_det'

PATH_TO_CKPT = MODEL_NAME + '/frozen_inference_graph.pb'
PATH_TO_CKPT = os.path.join(OD_DIR, PATH_TO_CKPT)
# List of the strings that is used to add correct label for each box.
PATH_TO_LABELS = os.path.join('data', 'mscoco_label_map.pbtxt')
PATH_TO_LABELS = os.path.join(OD_DIR, PATH_TO_LABELS)
NUM_CLASSES = 90

## Load a (frozen) Tensorflow model into memory.

In [15]:
detection_graph = tf.Graph()
with detection_graph.as_default():
  od_graph_def = tf.GraphDef()
  with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
    serialized_graph = fid.read()
    od_graph_def.ParseFromString(serialized_graph)
    tf.import_graph_def(od_graph_def, name='')

## Loading label map
Label maps map indices to category names, so that when our convolution network predicts `5`, we know that this corresponds to `airplane`.  Here we use internal utility functions, but anything that returns a dictionary mapping integers to appropriate string labels would be fine

In [16]:
label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
category_index = label_map_util.create_category_index(categories)

In [30]:
category_index[1]

{'id': 1, 'name': u'person'}

## Helper code

In [18]:
def load_image_into_numpy_array(image):
  (im_width, im_height) = image.size
  return np.array(image.getdata()).reshape(
      (im_height, im_width, 3)).astype(np.uint8)

In [19]:
# Convert from normalized coordinates to original image coordinates
def denormalise_box(box, image_size):
    box[:,0] = box[:,0] * image_size[1]
    box[:,1] = box[:,1] * image_size[0]
    box[:,2] = box[:,2] * image_size[1]
    box[:,3] = box[:,3] * image_size[0]
    return box

## Directory Structure
```
MOT16
/train
  /MOT16-02
    /seqinfo.ini
    /img1
    /gt
        gt.txt

PersonCounter
 /Output
   /ModelA
        prediction                 // Pickle file of groundtruth and prediction
        /Image                     // Folder of images with GT and predicted BB
        evaluate                   // Results of evalute

```

In [40]:
PATH_TO_DATABASE = '../MOT16/train/MOT16-10'        
PATH_TO_IMAGES_DIR = PATH_TO_DATABASE + 'img1/'
PATH_TO_ANNOTATIONS_DIR = PATH_TO_DATABASE + '/gt/gt.txt'

PATH_TO_OUTPUT_DIR = 'Output/'
PATH_TO_PREDICTION_DIR = os.path.join(PATH_TO_OUTPUT_DIR, MODEL_NAME) # Output/Model
PREDICTION_PKL_FILE = os.path.join(PATH_TO_PREDICTION_DIR, "prediction")

# Size, in inches, of the output images.
IMAGE_SIZE = (12, 8)

# Store results to Output/Model directory
if not os.path.exists(PATH_TO_PREDICTION_DIR):
    os.makedirs(PATH_TO_PREDICTION_DIR)
    os.makedirs(os.path.join(PATH_TO_PREDICTION_DIR,"Image"))

# Visualize BB

In [44]:
def visualize_image(image_np, prediction=None, groundtruth=None):
    if output_dict != None:
        # Plot the prediction
        vis_util.visualize_boxes_and_labels_on_image_array(
          image_np,
          prediction['detection_boxes'],
          prediction['detection_classes'],
          prediction['detection_scores'],
          category_index,
          instance_masks=prediction.get('detection_masks'),
          use_normalized_coordinates=False,
          min_score_thresh=0.30,
          line_thickness=8)
    
    if groundtruth != None:
        # Plot the ground truth
        vis_util.visualize_boxes_and_labels_on_image_array(
          image_np,
          groundtruth['groundtruth_boxes'],
          groundtruth['groundtruth_classes'],
          None,
          category_index,
          instance_masks=groundtruth.get('detection_masks'),
          use_normalized_coordinates=False)   

In [43]:
# file name with extension
def drawBB(image_path, prediction):
    # Visualization of the results of a detection.
    original_image_path = os.path.join(PATH_TO_IMAGES_DIR, image_path)   
    marked_image_path = os.path.join(PATH_TO_PREDICTION_DIR, 'Image')
    marked_image_path = os.path.join(marked_image_path, image_path) 

    image = Image.open(original_image_path)
    image_np = load_image_into_numpy_array(image)
    # Update the image with predicted and grounttruth BB
    visualize_image(image_np, prediction=prediction)
    im = Image.fromarray(image_np)
    IMAGE_FILE = os.path.join(marked_image_path)
    #print "Save file in" + IMAGE_FILE
    im.save(IMAGE_FILE)

# Detection

In [22]:
def run_inference_for_single_image(image, graph):
  with graph.as_default():
    with tf.Session() as sess:
      # Get handles to input and output tensors
      ops = tf.get_default_graph().get_operations()
      all_tensor_names = {output.name for op in ops for output in op.outputs}
      tensor_dict = {}
      for key in [
          'num_detections', 'detection_boxes', 'detection_scores',
          'detection_classes', 'detection_masks'
      ]:
        tensor_name = key + ':0'
        if tensor_name in all_tensor_names:
          tensor_dict[key] = tf.get_default_graph().get_tensor_by_name(
              tensor_name)
      if 'detection_masks' in tensor_dict:
        # The following processing is only for single image
        detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0])
        detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0])
        # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size.
        real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32)
        detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1])
        detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1])
        detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
            detection_masks, detection_boxes, image.shape[0], image.shape[1])
        detection_masks_reframed = tf.cast(
            tf.greater(detection_masks_reframed, 0.5), tf.uint8)
        # Follow the convention by adding back the batch dimension
        tensor_dict['detection_masks'] = tf.expand_dims(
            detection_masks_reframed, 0)
      image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0')
        
      # Run inference
      output_dict = sess.run(tensor_dict,
                             feed_dict={image_tensor: np.expand_dims(image, 0)})
 
      # all outputs are float32 numpy arrays, so convert types as appropriate
      output_dict['num_detections'] = int(output_dict['num_detections'][0])
      output_dict['detection_classes'] = output_dict[
          'detection_classes'][0].astype(np.uint8)
      output_dict['detection_boxes'] = output_dict['detection_boxes'][0]
      output_dict['detection_scores'] = output_dict['detection_scores'][0]
      if 'detection_masks' in output_dict:
        output_dict['detection_masks'] = output_dict['detection_masks'][0]
      # Need to include only the ones with confidence > threshold
  return output_dict

#### Extract groundtruth from CSV file

In [23]:
def extract_gt_for_single_image(image_id):
    annotation_path = os.path.join(PATH_TO_TEST_ANNOTATIONS_DIR, '{}.xml'.format(image_id))
    
    tree = ET.parse(annotation_path)
    root = tree.getroot()

    groundtruth_dict = {}
    gt_bbs = []
    cat_id = []
    cat_label = []
    
    for object_node in root.iterfind('object'):
        # Extract boundary box from XML files
        for bb in object_node.iterfind('bndbox'):
            gt_bb = []
            for val in ['ymin', 'xmin', 'ymax', 'xmax']:
                gt_bb.append(float(bb.find(val).text))
            gt_bbs.append(gt_bb)

        # Extract ground truth category
        child = object_node.find('name')
        cat_label.append(child.text)
        # Hardcoded as of now :/
        if child.text == 'cow':
            cat_id.append(21)
        elif child.text == 'dog':
            cat_id.append(18)
        # print(child.text)

    # Extract size
    groundtruth_dict['size'] = [int(root.find('size/width').text), int(root.find('size/height').text)]
    # Dog is category 18 / Todo update
    groundtruth_dict['num_detections'] = len(gt_bbs)
    groundtruth_dict['original_boxes'] = gt_bbs
    groundtruth_dict['groundtruth_boxes'] = np.array(gt_bbs, dtype="float32")
    groundtruth_dict['groundtruth_classes'] = np.array(cat_id) # np.full([len(gt_bbs)], 18)
    groundtruth_dict['groundtruth_class_labels'] = np.array(cat_label) # np.full([len(gt_bbs)], 'dog')
    
    return groundtruth_dict

In [38]:
# CSV text-file containing one object instance per line. Each line must contain 10 values: 
# <frame>, <id>, <bb_left>, <bb_top>, <bb_width>, <bb_height>, <conf>, <x>, <y>, <z> 
# All frame numbers, target IDs and bounding boxes are 1-based

def extract_gt():
    gt = 0
    with open(PATH_TO_ANNOTATIONS_DIR, 'rb') as csvfile:
        reader = csv.reader(csvfile, delimiter=',')
        for row in reader:
            print ', '.join(row)
            gt = row
            
            groundtruth = {}
            gt_bb = []
            gt_bb.append( float(row[0]) ) # ymin
            gt_bb.append( float(row[1]) ) # xmin
            gt_bb.append( float(row[2]) ) # ymax
            gt_bb.append( float(row[3]) ) # xmax
            
            gt_bbs = []
            gt_bbs.append(gt_bb)
            groundtruth['groundtruth_boxes'] = np.array(gt_bbs dtype="float32")
            groundtruth['groundtruth_classes'] =

            return prediction
    return gt

# Forward Pass
Evaluate the prediction using Tensorflow evaluation function

In [25]:
ev_data = {}

def forward_pass(gt):
    for image_path in os.listdir(PATH_TO_IMAGES_DIR):
        # File name without extension
        image_id = os.path.splitext(os.path.basename(image_path))[0]
        try:
            # Ground Truth
            #groundtruth_dict = extract_gt_for_single_image(image_id)
            groundtruth_dict = gt[image_id]
            image = Image.open(image_path)
            # the array based representation of the image will be used later in order to prepare the
            # result image with boxes and labels on it.
            image_np = load_image_into_numpy_array(image)
            # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
            image_np_expanded = np.expand_dims(image_np, axis=0)

            # Actual detection.
            output_dict = run_inference_for_single_image(image_np, detection_graph)

            # The predicition gives BB in normalized coordinated
            # Convert to original image cordinates from normalized coordinates (for evaluation and vizualization)
            output_dict['detection_boxes'] = denormalise_box(output_dict['detection_boxes'], groundtruth_dict['size'])

            ev_data[image_id] = (groundtruth_dict, output_dict)
            print(image_id,"Ground Truth", groundtruth_dict['num_detections'], "Detected" ,output_dict['num_detections'])

        except Exception as e:
            print(image_id, 'Error', e)
            continue

# Main

In [26]:
# Run the forward pass and store result in pickle file
def run():
    start_time = time.clock()
    print "Forward pass for ", PATH_TO_TEST_IMAGES_DIR
    gt = extract_gt()
    forward_pass(gt)

    # Save model prediction
    with open(PREDICTION_PKL_FILE,'wb') as fd:
        pickle.dump(ev_data, fd)
        print("\n", time.clock() - start_time, "seconds")

In [27]:
# Evaluate the prediction
def evaluate():
    print(PREDICTION_PKL_FILE)
    with open(PREDICTION_PKL_FILE,'rb') as fd:
        ev_data = pickle.load(fd)
        for image_path in os.listdir(PATH_TO_TEST_IMAGES_DIR):
            # File name without extension
            image_id = os.path.splitext(os.path.basename(image_path))[0]
            prediction = ev_data[image_id]
            # Plot the BB on image
            drawBB(image_path, prediction)
    print "Done"

In [39]:
def main():
    #run()
    #evaluate()
    prediction = extract_gt()
    drawBB(image_path, prediction)
main()

1, 1, 1368, 394, 74, 226, 1, 1, 1
