# Person Counter: Prediction
This notebook contains code for prediction using pre-trained models. It stores the output in a pickle file

In [1]:
DATASET = "MOT16"
VIDEO_SEQ = 10 # Range 01 to 14
MODEL_ID = 1

# Imports

In [2]:
import numpy as np
import os
import six.moves.urllib as urllib
import sys
import tarfile
import tensorflow as tf
import zipfile
import xml.etree.ElementTree as ET
import copy

from collections import defaultdict
from io import StringIO
#from matplotlib import pyplot as plt # Commented because of warning that matplot lib is already loaded
from PIL import Image
import pickle
import time
import csv

# This is needed since the notebook is stored in the object_detection folder.
sys.path.append("..")
from object_detection.utils import ops as utils_ops

if tf.__version__ < '1.4.0':
  raise ImportError('Please upgrade your tensorflow installation to v1.4.* or later!')


  from ._conv import register_converters as _register_converters


## Env setup

In [3]:
# This is needed to display the images.
%matplotlib inline

## Object detection imports
Here are the imports from the object detection module.

In [4]:
sys.path.append("../obj_det/")
from utils import label_map_util
from utils import visualization_utils as vis_util
from utils import per_image_evaluation as img_eval_util

This call to matplotlib.use() has no effect because the backend has already
been chosen; matplotlib.use() must be called *before* pylab, matplotlib.pyplot,
or matplotlib.backends is imported for the first time.

The backend was *originally* set to 'module://ipykernel.pylab.backend_inline' by the following code:
  File "/home/chrystle/anaconda2/envs/od27/lib/python2.7/runpy.py", line 174, in _run_module_as_main
    "__main__", fname, loader, pkg_name)
  File "/home/chrystle/anaconda2/envs/od27/lib/python2.7/runpy.py", line 72, in _run_code
    exec code in run_globals
  File "/home/chrystle/anaconda2/envs/od27/lib/python2.7/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/home/chrystle/anaconda2/envs/od27/lib/python2.7/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/home/chrystle/anaconda2/envs/od27/lib/python2.7/site-packages/ipykernel/kernelapp.py", line 486, in start
    self.io_loo

# Model preparation 

## Variables

We use models from the [detection model zoo](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/detection_model_zoo.md)

In [5]:
# Models used in paper
ALL_MODEL = ['ssd_mobilenet_v1_coco_2017_11_17' #0
    ,'ssd_inception_v2_coco_2017_11_17' #1
    ,'rfcn_resnet101_coco_2018_01_28' #2
    ,'faster_rcnn_resnet101_coco_2018_01_28' #3
    ,'faster_rcnn_inception_v2_coco_2018_01_28' #4
]

MODEL_NAME = ALL_MODEL[MODEL_ID]

# Path to frozen detection graph. This is the actual model that is used for the object detection.
OD_DIR = '../obj_det'

PATH_TO_CKPT = MODEL_NAME + '/frozen_inference_graph.pb'
PATH_TO_CKPT = os.path.join(OD_DIR, PATH_TO_CKPT)
# List of the strings that is used to add correct label for each box.
PATH_TO_LABELS = os.path.join('data', 'mscoco_label_map.pbtxt')
PATH_TO_LABELS = os.path.join(OD_DIR, PATH_TO_LABELS)
NUM_CLASSES = 90

## Loading label map
Label maps map indices to category names, so that when our convolution network predicts `5`, we know that this corresponds to `airplane`.  Here we use internal utility functions, but anything that returns a dictionary mapping integers to appropriate string labels would be fine

In [6]:
label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
category_index = label_map_util.create_category_index(categories)

In [7]:
category_index[1]

{'id': 1, 'name': u'person'}

## Helper code

In [8]:
def load_image_into_numpy_array(image):
  (im_width, im_height) = image.size
  return np.array(image.getdata()).reshape(
      (im_height, im_width, 3)).astype(np.uint8)

In [9]:
# Convert from normalized coordinates to original image coordinates
def denormalise_box(box, image_size):
    box[:,0] = box[:,0] * image_size[1]
    box[:,1] = box[:,1] * image_size[0]
    box[:,2] = box[:,2] * image_size[1]
    box[:,3] = box[:,3] * image_size[0]
    return box

## Directory Structure
```
MOT16
/train
  /MOT16-02
    /seqinfo.ini
    /img1
    /gt
        gt.txt

PersonCounter
 /Output
   /ModelA
        prediction                 // Pickle file of groundtruth and prediction
        /Image                     // Folder of images with GT and predicted BB
        evaluate                   // Results of evalute

```

In [71]:
if VIDEO_SEQ in [2,4,5,9,10,11,13]:
    VIDEO_SEQ = str(VIDEO_SEQ).zfill(2)
    PATH_TO_DATABASE = '../MOT16/train/MOT16-' + VIDEO_SEQ + '/'
else:
    VIDEO_SEQ = str(VIDEO_SEQ).zfill(2)
    PATH_TO_DATABASE = '../MOT16/test/MOT16-' + VIDEO_SEQ + '/'

PATH_TO_IMAGES_DIR = PATH_TO_DATABASE + 'img1/'
PATH_TO_ANNOTATIONS_DIR = PATH_TO_DATABASE + '/gt/gt.txt'

PATH_TO_OUTPUT_DIR = 'Output/'
PATH_TO_PREDICTION_DIR = os.path.join(PATH_TO_OUTPUT_DIR, MODEL_NAME + '_MOT16_' + VIDEO_SEQ) # Output/Model_MOT16_01
PREDICTION_PKL_FILE = os.path.join(PATH_TO_PREDICTION_DIR, "prediction")
FILTERED_PKL_FILE = os.path.join(PATH_TO_PREDICTION_DIR, "prediction_filtered") # Filtered to only person class

# Size, in inches, of the output images.
IMAGE_SIZE = (12, 8)

# Store results to Output/Model directory
if not os.path.exists(PATH_TO_PREDICTION_DIR):
    os.makedirs(PATH_TO_PREDICTION_DIR)
    os.makedirs(os.path.join(PATH_TO_PREDICTION_DIR,"Image"))

# Visualize BB

In [11]:
# image_np : np array of the image
# prediction : prediction dictionary for the image
# groundtruth : groundtruth dictionary for the image
def visualize_image(image_np, prediction=None, groundtruth=None):
    if prediction != None:
        # Plot the prediction
        vis_util.visualize_boxes_and_labels_on_image_array(
          image_np,
          prediction['detection_boxes'],
          prediction['detection_classes'],
          prediction['detection_scores'],
          category_index,
          instance_masks=prediction.get('detection_masks'),
          use_normalized_coordinates=False,
          min_score_thresh=0.30,
          line_thickness=8)
    
    if groundtruth != None:
        # Plot the ground truth
        vis_util.visualize_boxes_and_labels_on_image_array(
          image_np,
          groundtruth['groundtruth_boxes'],
          groundtruth['groundtruth_classes'],
          None,
          category_index,
          instance_masks=groundtruth.get('detection_masks'),
          use_normalized_coordinates=False)   

In [12]:
# image_path : file name with extension E.g: 00010.jpg
# prediction : prediction dictionary of the image
def drawBB(image_path, prediction):
    # Visualization of the results of a detection.
    original_image_path = os.path.join(PATH_TO_IMAGES_DIR, image_path)   
    marked_image_path = os.path.join(PATH_TO_PREDICTION_DIR, 'Image')
    marked_image_path = os.path.join(marked_image_path, image_path) 

    image = Image.open(original_image_path)
    image_np = load_image_into_numpy_array(image)
    # Update the image with predicted and groundtruth BB
    visualize_image(image_np, groundtruth=prediction)
    # visualize_image(image_np, prediction=prediction)    
    im = Image.fromarray(image_np)
    IMAGE_FILE = os.path.join(marked_image_path)
    #print "Save file in" + IMAGE_FILE
    im.save(IMAGE_FILE)

## Extract groundtruth from CSV file

In [13]:
def extract_gt_for_single_image(image_id):
    annotation_path = os.path.join(PATH_TO_TEST_ANNOTATIONS_DIR, '{}.xml'.format(image_id))
    
    tree = ET.parse(annotation_path)
    root = tree.getroot()

    groundtruth_dict = {}
    gt_bbs = []
    cat_id = []
    cat_label = []
    
    for object_node in root.iterfind('object'):
        # Extract boundary box from XML files
        for bb in object_node.iterfind('bndbox'):
            gt_bb = []
            for val in ['ymin', 'xmin', 'ymax', 'xmax']:
                gt_bb.append(float(bb.find(val).text))
            gt_bbs.append(gt_bb)

        # Extract ground truth category
        child = object_node.find('name')
        cat_label.append(child.text)
        # Hardcoded as of now :/
        if child.text == 'cow':
            cat_id.append(21)
        elif child.text == 'dog':
            cat_id.append(18)
        # print(child.text)

    # Extract size
    groundtruth_dict['size'] = [int(root.find('size/width').text), int(root.find('size/height').text)]
    # Dog is category 18 / Todo update
    groundtruth_dict['num_detections'] = len(gt_bbs)
    groundtruth_dict['original_boxes'] = gt_bbs
    groundtruth_dict['groundtruth_boxes'] = np.array(gt_bbs, dtype="float32")
    groundtruth_dict['groundtruth_classes'] = np.array(cat_id) # np.full([len(gt_bbs)], 18)
    groundtruth_dict['groundtruth_class_labels'] = np.array(cat_label) # np.full([len(gt_bbs)], 'dog')
    
    return groundtruth_dict

In [14]:
# CSV text-file containing one object instance per line. Each line must contain 10 values: 
# <frame>, <id>, <bb_left>, <bb_top>, <bb_width>, <bb_height>, <conf>, <x>, <y>, <z> 
# All frame numbers, target IDs and bounding boxes are 1-based
# Frame number is image number without leading 0
# Person ID is <id>

def extract_gt():
    gt = 0
    with open(PATH_TO_ANNOTATIONS_DIR, 'rb') as csvfile:
        reader = csv.reader(csvfile, delimiter=',')
        for row in reader:
            print ', '.join(row)
            groundtruth = {}
            gt_bbs = []
            cat_label = []
            
            # For each BB in each image
            gt_bb = []
            gt_bb.append( float(row[3]) + float(row[5])  ) # ymin # bb_top - bb_height
            gt_bb.append( float(row[2]) ) # xmin # bb_left
            gt_bb.append( float(row[3]) ) # ymax # bb_top
            gt_bb.append( float(row[2]) + float(row[4]) ) # xmax # bb_left + bb_width
            
            gt_bbs.append(gt_bb)
            cat_label.append(str(category_index[1]['name']))
            print cat_label
            groundtruth['filename'] = row[1].zfill(6) + '.jpg'
            groundtruth['groundtruth_boxes'] = np.array(gt_bbs, dtype="float32")
            groundtruth['groundtruth_classes'] = np.array(cat_label)

            return groundtruth
    return gt

# Matching algo
Data association via IoU

In [15]:
"""
Given two frames, match the detection to groundtruth.
In decreasing sorted order, assign the GT if IoU > IoU threshold
Filter detection score < score threshold

    Args:
      detected_boxes: A numpy array of shape [N, 4] representing detected box coordinates
      detected_scores: A 1-d numpy array of length N representing classification score
      groundtruth_boxes: A numpy array of shape [M, 4] representing ground truth box coordinates

    Returns:
      scores: A numpy array representing the detection scores, sorted and filtered.
      max_overlap_gt_ids: A numpy array indicating the detection's corresponding groundtruth box
      tp_fp_labels: a boolean numpy array indicating whether a detection is a true positive.
      is_gt_box_detected: Indicates if a ground truth box is detected 
"""
def twoFrameMatching(pie, detected_boxes, detected_scores, groundtruth_boxes):
    # Default value false
    num_groundtruth_boxes = np.shape(groundtruth_boxes)[0]
    groundtruth_is_group_of_list = np.zeros(num_groundtruth_boxes, dtype=bool)

    # Compute IoU for every detection and groundtruth pair
    # Detection with score < score threshold are ignored. Rest are sorted.
    (iou, _, scores, num_detected_boxes) = pie._get_overlaps_and_scores_box_mode(
               detected_boxes=detected_boxes,
               detected_scores=detected_scores,
               groundtruth_boxes=groundtruth_boxes,
               groundtruth_is_group_of_list=groundtruth_is_group_of_list)
    
    # If no GT value then all detection are false positive
    if groundtruth_boxes.size == 0:
        #return scores, np.zeros(num_detected_boxes, dtype=bool)
        print "All FP"
    
    # Is ith detection a True positive
    tp_fp_labels = np.zeros(num_detected_boxes, dtype=bool)

    # Tp-fp evaluation for non-group of boxes (if any).
    if iou.shape[1] > 0:
        # For each detection, the best matched GT 
        max_overlap_gt_ids = np.argmax(iou, axis=1)
        is_gt_box_detected = np.zeros(iou.shape[1], dtype=bool)
        for i in range(num_detected_boxes):
            gt_id = max_overlap_gt_ids[i]
            if iou[i, gt_id] >= pie.matching_iou_threshold and not is_gt_box_detected[gt_id]:
                tp_fp_labels[i] = True
                is_gt_box_detected[gt_id] = True

    # Detection matched to
    for i in range(num_detected_boxes):
        print "Dt", scores[i], "GT", max_overlap_gt_ids[i], "", tp_fp_labels[i]
    for i in range(num_groundtruth_boxes):
        print "GT", i, bool(is_gt_box_detected[i])

    return scores, max_overlap_gt_ids, tp_fp_labels, is_gt_box_detected 

In [16]:
num_groundtruth_classes = 1
matching_iou_threshold = 0.5
nms_iou_threshold = 1.0
nms_max_output_boxes = 10000

# Per image evaluation
pie = img_eval_util.PerImageEvaluation(
    num_groundtruth_classes, matching_iou_threshold, nms_iou_threshold,
    nms_max_output_boxes)

# Dataset
detected_boxes = np.array([[0, 0, 1, 1], [0, 0, 2, 2], [0, 0, 3, 3]], dtype=float)
detected_scores = np.array([0.6, 0.8, 0.5], dtype=float) # Original

groundtruth_boxes = np.array([[0, 0, 1, 1], [0, 0, 10, 10]], dtype=float)

#twoFrameMatching(pie, detected_boxes, detected_scores, groundtruth_boxes)

In [69]:
with open(PREDICTION_PKL_FILE,'rb') as fd:
    ev_data = pickle.load(fd)
    groundtruth = ev_data['000001']
    print [i for i in groundtruth.keys()]
    # Keep only person
    print np.unique(groundtruth['detection_classes'])
    idx = groundtruth['detection_classes'][groundtruth['detection_classes'] == 1]
    groundtruth['num_detections'] = np.count_nonzero(idx)
    groundtruth['detection_boxes'] = groundtruth['detection_boxes'][idx]
    groundtruth['detection_scores'] = groundtruth['detection_boxes'][idx]
    groundtruth['detection_classes'] = groundtruth['detection_classes'][idx]

# Add 

['num_detections', 'detection_boxes', 'detection_scores', 'detection_classes']
[ 1  3  6  7  8 10 15 27 28 31 33]
[1]


In [18]:
# from object_detection.utils import np_box_list
# from object_detection.utils import np_box_list_ops

# detected_boxlist = np_box_list.BoxList(detected_boxes)
# print detected_boxlist
# detected_boxlist.add_field('scores', detected_scores)

# detected_boxlist = np_box_list_ops.non_max_suppression(detected_boxlist, pie.nms_max_output_boxes, pie.nms_iou_threshold)

# print detected_scores
# scores = detected_boxlist.get_field('scores')
# print scores

# Main

In [75]:
# Filter prediction to only person class
def filter_prediction():
    print(PREDICTION_PKL_FILE)
    with open(PREDICTION_PKL_FILE,'rb') as fd:
        ev_data = pickle.load(fd)
        # Need to sequentially analyse
        for i in range(1,655):
            # File name without extension
            image_id = str(i).zfill(6)
            prediction = ev_data[image_id]
            # Person class = 1 in COCO dataset
            idx = prediction['detection_classes'][prediction['detection_classes'] == 1]
            prediction['num_detections'] = np.count_nonzero(idx)
            prediction['detection_boxes'] = prediction['detection_boxes'][idx]
            prediction['detection_scores'] = prediction['detection_boxes'][idx]
            prediction['detection_classes'] = prediction['detection_classes'][idx]
            print "Image", image_id, "dt", groundtruth['num_detections']
        # Store in pickle
        with open(FILTERED_PKL_FILE,'wb') as fd2:
            pickle.dump(ev_data, fd2)

In [76]:
ev_data={}
# Evaluate the prediction
def evaluate():
    print(PREDICTION_PKL_FILE)
    with open(PREDICTION_PKL_FILE,'rb') as fd:
        ev_data = pickle.load(fd)
        # Need to sequentially analyse
#         for i in range(1,655):
#             # File name without extension
#             image_id = str(i).zfill(6)
#             prediction = ev_data[image_id]
            # Plot the BB on image
            # drawBB(image_path, prediction)
    return ev_data

In [77]:
def main():
    filter_prediction()
    #ev_data = evaluate()
    print "Done"
main()

Output/ssd_inception_v2_coco_2017_11_17_MOT16_10/prediction
Image 000001 dt 61
Image 000002 dt 61
Image 000003 dt 61
Image 000004 dt 61
Image 000005 dt 61
Image 000006 dt 61
Image 000007 dt 61
Image 000008 dt 61
Image 000009 dt 61
Image 000010 dt 61
Image 000011 dt 61
Image 000012 dt 61
Image 000013 dt 61
Image 000014 dt 61
Image 000015 dt 61
Image 000016 dt 61
Image 000017 dt 61
Image 000018 dt 61
Image 000019 dt 61
Image 000020 dt 61
Image 000021 dt 61
Image 000022 dt 61
Image 000023 dt 61
Image 000024 dt 61
Image 000025 dt 61
Image 000026 dt 61
Image 000027 dt 61
Image 000028 dt 61
Image 000029 dt 61
Image 000030 dt 61
Image 000031 dt 61
Image 000032 dt 61
Image 000033 dt 61
Image 000034 dt 61
Image 000035 dt 61
Image 000036 dt 61
Image 000037 dt 61
Image 000038 dt 61
Image 000039 dt 61
Image 000040 dt 61
Image 000041 dt 61
Image 000042 dt 61
Image 000043 dt 61
Image 000044 dt 61
Image 000045 dt 61
Image 000046 dt 61
Image 000047 dt 61
Image 000048 dt 61
Image 000049 dt 61
Image 000

Image 000628 dt 61
Image 000629 dt 61
Image 000630 dt 61
Image 000631 dt 61
Image 000632 dt 61
Image 000633 dt 61
Image 000634 dt 61
Image 000635 dt 61
Image 000636 dt 61
Image 000637 dt 61
Image 000638 dt 61
Image 000639 dt 61
Image 000640 dt 61
Image 000641 dt 61
Image 000642 dt 61
Image 000643 dt 61
Image 000644 dt 61
Image 000645 dt 61
Image 000646 dt 61
Image 000647 dt 61
Image 000648 dt 61
Image 000649 dt 61
Image 000650 dt 61
Image 000651 dt 61
Image 000652 dt 61
Image 000653 dt 61
Image 000654 dt 61
Done
