# Person Counter: Prediction
This notebook contains code for prediction using pre-trained models. It stores the output in a pickle file

In [1]:
DATASET = "MOT16"
VIDEO_SEQ_ID = 10 # Range 01 to 14
MODEL_ID = 1

# Imports

In [2]:
import numpy as np
import os
import sys
import tensorflow as tf
import copy

from collections import defaultdict
from io import StringIO
#from matplotlib import pyplot as plt # Commented because of warning that matplot lib is already loaded
from PIL import Image
import pickle
import csv

# This is needed since the notebook is stored in the object_detection folder.
sys.path.append("..")
from object_detection.utils import ops as utils_ops

  from ._conv import register_converters as _register_converters


## Env setup

In [3]:
# This is needed to display the images.
%matplotlib inline

## Object detection imports
Here are the imports from the object detection module.

In [4]:
#from pc_utils import pc_PerImageEvaluation
import pc_utils
sys.path.append("../obj_det/")
from utils import label_map_util
from utils import visualization_utils as vis_util

This call to matplotlib.use() has no effect because the backend has already
been chosen; matplotlib.use() must be called *before* pylab, matplotlib.pyplot,
or matplotlib.backends is imported for the first time.

The backend was *originally* set to 'module://ipykernel.pylab.backend_inline' by the following code:
  File "/home/chrystle/anaconda2/envs/od27/lib/python2.7/runpy.py", line 174, in _run_module_as_main
    "__main__", fname, loader, pkg_name)
  File "/home/chrystle/anaconda2/envs/od27/lib/python2.7/runpy.py", line 72, in _run_code
    exec code in run_globals
  File "/home/chrystle/anaconda2/envs/od27/lib/python2.7/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/home/chrystle/anaconda2/envs/od27/lib/python2.7/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/home/chrystle/anaconda2/envs/od27/lib/python2.7/site-packages/ipykernel/kernelapp.py", line 486, in start
    self.io_loo

# Model preparation 

## Variables

We use models from the [detection model zoo](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/detection_model_zoo.md)

In [5]:
# Models used in paper
ALL_MODEL = ['ssd_mobilenet_v1_coco_2017_11_17' #0
    ,'ssd_inception_v2_coco_2017_11_17' #1
    ,'rfcn_resnet101_coco_2018_01_28' #2
    ,'faster_rcnn_resnet101_coco_2018_01_28' #3
    ,'faster_rcnn_inception_v2_coco_2018_01_28' #4
]

MODEL_NAME = ALL_MODEL[MODEL_ID]

# Path to frozen detection graph. This is the actual model that is used for the object detection.
OD_DIR = '../obj_det'

PATH_TO_CKPT = MODEL_NAME + '/frozen_inference_graph.pb'
PATH_TO_CKPT = os.path.join(OD_DIR, PATH_TO_CKPT)
# List of the strings that is used to add correct label for each box.
PATH_TO_LABELS = os.path.join('data', 'mscoco_label_map.pbtxt')
PATH_TO_LABELS = os.path.join(OD_DIR, PATH_TO_LABELS)
NUM_CLASSES = 90

## Loading label map
Label maps map indices to category names, so that when our convolution network predicts `5`, we know that this corresponds to `airplane`.  Here we use internal utility functions, but anything that returns a dictionary mapping integers to appropriate string labels would be fine

In [6]:
label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
category_index = label_map_util.create_category_index(categories)
pc_label = {} # For label marking

In [7]:
category_index[1]

{'id': 1, 'name': u'person'}

## Helper code

In [8]:
def load_image_into_numpy_array(image):
  (im_width, im_height) = image.size
  return np.array(image.getdata()).reshape(
      (im_height, im_width, 3)).astype(np.uint8)

In [9]:
# Convert from normalized coordinates to original image coordinates
def denormalise_box(box, image_size):
    box[:,0] = box[:,0] * image_size[1]
    box[:,1] = box[:,1] * image_size[0]
    box[:,2] = box[:,2] * image_size[1]
    box[:,3] = box[:,3] * image_size[0]
    return box

## Directory Structure
```
MOT16
/train
  /MOT16-02
    /seqinfo.ini
    /img1
    /gt
        gt.txt

PersonCounter
 /Output
   /ModelA
        prediction                 // Pickle file of groundtruth and prediction
        /Image                     // Folder of images with GT and predicted BB
        evaluate                   // Results of evalute

```

In [10]:
def getImageCount():
    # Read seqinfo.ini to get seqLength=654
    with open(PATH_TO_DATABASE + 'seqinfo.ini','rb') as fd:
        meta = fd.read()
        idx = meta.find('seqLength')
        meta[idx+10:idx+13]
        return int( meta[idx+10:idx+13] )

In [11]:
if VIDEO_SEQ_ID in [2,4,5,9,10,11,13]:
    VIDEO_SEQ = str(VIDEO_SEQ_ID).zfill(2)
    PATH_TO_DATABASE = '../MOT16/train/MOT16-' + VIDEO_SEQ + '/'
else:
    VIDEO_SEQ = str(VIDEO_SEQ_ID).zfill(2)
    PATH_TO_DATABASE = '../MOT16/test/MOT16-' + VIDEO_SEQ + '/'

IMAGE_COUNT = getImageCount()

#print IMAGE_COUNT
PATH_TO_IMAGES_DIR = PATH_TO_DATABASE + 'img1/'
PATH_TO_ANNOTATIONS_DIR = PATH_TO_DATABASE + '/gt/gt.txt'

PATH_TO_OUTPUT_DIR = 'Output/'
PATH_TO_PREDICTION_DIR = os.path.join(PATH_TO_OUTPUT_DIR, MODEL_NAME + '_MOT16_' + VIDEO_SEQ) # Output/Model_MOT16_01
PREDICTION_PKL_FILE = os.path.join(PATH_TO_PREDICTION_DIR, "prediction")
FILTERED_PKL_FILE = os.path.join(PATH_TO_PREDICTION_DIR, "prediction_filtered") # Filtered to only person class
RESULT_CSV_FILE = os.path.join(PATH_TO_PREDICTION_DIR, "dt.csv") # Per frame per object id
SUMMARY_CSV_FILE = os.path.join(PATH_TO_PREDICTION_DIR, "summary.csv") # Frame wise summary

# Size, in inches, of the output images.
IMAGE_SIZE = (12, 8)

# Store results to Output/Model directory
if not os.path.exists(PATH_TO_PREDICTION_DIR):
    os.makedirs(PATH_TO_PREDICTION_DIR)
    os.makedirs(os.path.join(PATH_TO_PREDICTION_DIR,"Image"))

# Visualize BB

In [12]:
# image_np : np array of the image
# prediction : prediction dictionary for the image
# groundtruth : groundtruth dictionary for the image
def visualize_image(image_np, prediction=None, groundtruth=None, isIDMode=False):
    if isIDMode and prediction != None:
        # Plot the prediction
        vis_util.visualize_boxes_and_labels_on_image_array(
          image_np,
          prediction['detection_boxes'],
          prediction['person_id'], #prediction['detection_classes'],
          prediction['detection_scores'],
          pc_label, #category_index,
          instance_masks=prediction.get('detection_masks'),
          use_normalized_coordinates=False,
          min_score_thresh=0.30,
          line_thickness=8)
        return
        
    if prediction != None:
        # Plot the prediction
        vis_util.visualize_boxes_and_labels_on_image_array(
          image_np,
          prediction['detection_boxes'],
          prediction['detection_classes'],
          prediction['detection_scores'],
          category_index,
          instance_masks=prediction.get('detection_masks'),
          use_normalized_coordinates=False,
          min_score_thresh=0.30,
          line_thickness=8)
    
    if groundtruth != None:
        # Plot the ground truth
        vis_util.visualize_boxes_and_labels_on_image_array(
          image_np,
          groundtruth['groundtruth_boxes'],
          groundtruth['groundtruth_classes'],
          None,
          category_index,
          instance_masks=groundtruth.get('detection_masks'),
          use_normalized_coordinates=False)   

In [13]:
# image_path : file name with extension E.g: 00010.jpg
# prediction : prediction dictionary of the image
def drawBB(image_path, prediction, isIDMode=False):
    # Visualization of the results of a detection.
    original_image_path = os.path.join(PATH_TO_IMAGES_DIR, image_path)   
    marked_image_path = os.path.join(PATH_TO_PREDICTION_DIR, 'Image')
    marked_image_path = os.path.join(marked_image_path, image_path) 

    image = Image.open(original_image_path)
    image_np = load_image_into_numpy_array(image)
    # Update the image with predicted and groundtruth BB
    #visualize_image(image_np, groundtruth=prediction)
    visualize_image(image_np, prediction=prediction, isIDMode=isIDMode)    
    im = Image.fromarray(image_np)
    IMAGE_FILE = os.path.join(marked_image_path)
    #print "Save file in" + IMAGE_FILE
    im.save(IMAGE_FILE)

## Extract groundtruth from CSV file

In [15]:
# CSV text-file containing one object instance per line. Each line must contain 10 values: 
# <frame>, <id>, <bb_left>, <bb_top>, <bb_width>, <bb_height>, <conf>, <x>, <y>, <z> 
# All frame numbers, target IDs and bounding boxes are 1-based
# Frame number is image number without leading 0
# Person ID is <id>

def extract_gt():
    gt = 0
    with open(PATH_TO_ANNOTATIONS_DIR, 'rb') as csvfile:
        reader = csv.reader(csvfile, delimiter=',')
        for row in reader:
            print ', '.join(row)
            groundtruth = {}
            gt_bbs = []
            cat_label = []
            
            # For each BB in each image
            gt_bb = []
            gt_bb.append( float(row[3]) + float(row[5])  ) # ymin # bb_top - bb_height
            gt_bb.append( float(row[2]) ) # xmin # bb_left
            gt_bb.append( float(row[3]) ) # ymax # bb_top
            gt_bb.append( float(row[2]) + float(row[4]) ) # xmax # bb_left + bb_width
            
            gt_bbs.append(gt_bb)
            cat_label.append(str(category_index[1]['name']))
            print cat_label
            groundtruth['filename'] = row[1].zfill(6) + '.jpg'
            groundtruth['groundtruth_boxes'] = np.array(gt_bbs, dtype="float32")
            groundtruth['groundtruth_classes'] = np.array(cat_label)

            return groundtruth
    return gt

In [16]:
# Unroll the prediction BB as multiple row
#<frame>, <id>, <bb_left>, <bb_top>, <bb_width>, <bb_height>, <conf>, <x>, <y>, <z>
def appendMatching(image_id, prediction):
    # Result per frame per object
    for i in range(prediction['num_detections']):
        row = []
        # ID
        row.append(prediction['person_id'][i])
        # bb_left
        row.append(prediction['detection_boxes'][i][1])
        # bb_top
        row.append(prediction['detection_boxes'][i][2])
        # bb_width
        row.append(prediction['detection_boxes'][i][3] - row[2])
        # bb_height
        row.append(row[3] - prediction['detection_boxes'][i][0] )
        row = ','.join(map(str,row))

        # Frame number
        row = image_id + ',' + row + '\n'
        with open(RESULT_CSV_FILE,'a') as fd:
            fd.write(row)
    
    # Summary per frame
    # <frame_id> <total_objects> <entry object> <exited object> <same object>
    # Total object is this frame
    # cnt of objects entered / detected first time wrt prev frame
    # cnt of objects who left the frame wrt prev
    
    row = []
    # total
    row.append( prediction['num_detections'] )
    # entry_cnt per frame
    row.append( prediction['num_detections_entry'] )
    # exit_cnt per frame
    row.append( prediction['num_detections_exit'] )
    # same
    row.append( row[0] - row[1])
    
    row = ','.join(map(str, row))
    row = image_id + ',' + row + '\n'
    with open(SUMMARY_CSV_FILE,'a') as fd:
        fd.write(row)

# Matching algo
Data association via IoU

In [17]:
PERSON_COUNTER = 0

In [18]:
# Match objects between two frames and tag id to new objects
def twoFrameMatching(pie, dt, gt):
    dt, exit_cnt = pc_utils.matchOnIoU(pie, dt, gt)
    entry_cnt = 0
    # Assign id to unassigned detections
    for i in range(dt['num_detections']):
        if dt['person_id'][i] == -1:
            global PERSON_COUNTER
            PERSON_COUNTER += 1
            global pc_label
            pc_label[PERSON_COUNTER] = { 'id': PERSON_COUNTER, 'name' : 'PC'+ str(PERSON_COUNTER) }
            dt['person_id'][i] = PERSON_COUNTER
            entry_cnt = entry_cnt + 1
    dt['num_detections_entry'] = entry_cnt
    dt['num_detections_exit'] = exit_cnt
    
    return dt

In [19]:
def makeGT(image_id, gt, initMode=False):
    if initMode:
        # Initialize the person counter
        global PERSON_COUNTER
        PERSON_COUNTER = 0
        gt['person_id'] = np.array([i+1 for i in range(gt['num_detections'])])
        # fill the pc label map
        for i in range(gt['num_detections']):
            global pc_label
            pc_label[i] = { 'id': i, 'name' : 'PC'+ str(i)}

    appendMatching(image_id, gt)
    # Save first image
    image_path = image_id + '.jpg'
    drawBB(image_path, gt, isIDMode=True)
    
    return gt

In [20]:
# Assign ID to person detected 
def assignID():
    if os.path.exists(RESULT_CSV_FILE):
        os.remove(RESULT_CSV_FILE)
    if os.path.exists(SUMMARY_CSV_FILE):
        os.remove(SUMMARY_CSV_FILE)

    with open(FILTERED_PKL_FILE,'rb') as fd:
        ev_data = pickle.load(fd)

    # Init per image evaluation
    num_groundtruth_classes = 1
    matching_iou_threshold = 0.5
    nms_iou_threshold = 1.0
    nms_max_output_boxes = 10000

    pie = pc_utils.pc_PerImageEvaluation(num_groundtruth_classes, matching_iou_threshold, nms_iou_threshold,nms_max_output_boxes)

    # First frame
    frame_id = 1
    image_id = str(frame_id).zfill(6)
    gt = ev_data[image_id] # treated as gt
    gt['num_detections_entry'] = gt['num_detections']
    gt['num_detections_exit'] = 0
    gt = makeGT(image_id, gt, initMode=True)
   
    # Next frame
    image_id = str(frame_id+1).zfill(6)
    dt = ev_data[image_id] # treated as dt

    for frame_id in range(1,IMAGE_COUNT-1): # Upto last but one
        # Returns gt for next
        gt = twoFrameMatching(pie, dt, gt)
        gt = makeGT(image_id, gt, initMode=False)
        
        # Prepare next loop
        frame_id = frame_id + 1
        image_id = str(frame_id+1).zfill(6)
        dt = ev_data[image_id]


# Main

In [21]:
# Filter prediction to only person class
def filter_prediction():
    print(PREDICTION_PKL_FILE)
    with open(PREDICTION_PKL_FILE,'rb') as fd:
        ev_data = pickle.load(fd)
        # Need to sequentially analyse
        for i in range(1,IMAGE_COUNT): # 655
            # File name without extension
            image_id = str(i).zfill(6)
            prediction = ev_data[image_id]
            # Person class = 1 in COCO dataset
            idx = prediction['detection_classes'] == 1
            prediction['num_detections'] = np.count_nonzero(idx)
            prediction['detection_boxes'] = prediction['detection_boxes'][idx, :]
            prediction['detection_scores'] = prediction['detection_scores'][idx]
            prediction['detection_classes'] = prediction['detection_classes'][idx]
            print "Image", image_id, "dt", prediction['num_detections']
        # Store in pickle
        with open(FILTERED_PKL_FILE,'wb') as fd2:
            pickle.dump(ev_data, fd2)

In [22]:
def main():
    #filter_prediction()
    assignID()
    print "Total person", PERSON_COUNTER
    print "Done"
main()

Total person 7705
Done
