# Person Counter: Prediction
This notebook contains code for prediction using pre-trained models. It stores the output in a pickle file

In [None]:
DATASET = "MOT16"
VIDEO_SEQ = 10 # Range 01 to 14
MODEL_ID = 1

# Imports

In [None]:
import numpy as np
import os
import sys
import tensorflow as tf
import copy

from collections import defaultdict
from io import StringIO
#from matplotlib import pyplot as plt # Commented because of warning that matplot lib is already loaded
from PIL import Image
import pickle
import csv

# This is needed since the notebook is stored in the object_detection folder.
sys.path.append("..")
from object_detection.utils import ops as utils_ops

## Env setup

In [None]:
# This is needed to display the images.
%matplotlib inline

## Object detection imports
Here are the imports from the object detection module.

In [None]:
#from pc_utils import pc_PerImageEvaluation
import pc_utils
sys.path.append("../obj_det/")
from utils import label_map_util
from utils import visualization_utils as vis_util

# Model preparation 

## Variables

We use models from the [detection model zoo](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/detection_model_zoo.md)

In [None]:
# Models used in paper
ALL_MODEL = ['ssd_mobilenet_v1_coco_2017_11_17' #0
    ,'ssd_inception_v2_coco_2017_11_17' #1
    ,'rfcn_resnet101_coco_2018_01_28' #2
    ,'faster_rcnn_resnet101_coco_2018_01_28' #3
    ,'faster_rcnn_inception_v2_coco_2018_01_28' #4
]

MODEL_NAME = ALL_MODEL[MODEL_ID]

# Path to frozen detection graph. This is the actual model that is used for the object detection.
OD_DIR = '../obj_det'

PATH_TO_CKPT = MODEL_NAME + '/frozen_inference_graph.pb'
PATH_TO_CKPT = os.path.join(OD_DIR, PATH_TO_CKPT)
# List of the strings that is used to add correct label for each box.
PATH_TO_LABELS = os.path.join('data', 'mscoco_label_map.pbtxt')
PATH_TO_LABELS = os.path.join(OD_DIR, PATH_TO_LABELS)
NUM_CLASSES = 90

## Loading label map
Label maps map indices to category names, so that when our convolution network predicts `5`, we know that this corresponds to `airplane`.  Here we use internal utility functions, but anything that returns a dictionary mapping integers to appropriate string labels would be fine

In [None]:
label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
category_index = label_map_util.create_category_index(categories)
pc_label = {} # For label marking

In [None]:
category_index[1]

## Helper code

In [None]:
def load_image_into_numpy_array(image):
  (im_width, im_height) = image.size
  return np.array(image.getdata()).reshape(
      (im_height, im_width, 3)).astype(np.uint8)

In [None]:
# Convert from normalized coordinates to original image coordinates
def denormalise_box(box, image_size):
    box[:,0] = box[:,0] * image_size[1]
    box[:,1] = box[:,1] * image_size[0]
    box[:,2] = box[:,2] * image_size[1]
    box[:,3] = box[:,3] * image_size[0]
    return box

## Directory Structure
```
MOT16
/train
  /MOT16-02
    /seqinfo.ini
    /img1
    /gt
        gt.txt

PersonCounter
 /Output
   /ModelA
        prediction                 // Pickle file of groundtruth and prediction
        /Image                     // Folder of images with GT and predicted BB
        evaluate                   // Results of evalute

```

In [None]:
if VIDEO_SEQ in [2,4,5,9,10,11,13]:
    VIDEO_SEQ = str(VIDEO_SEQ).zfill(2)
    PATH_TO_DATABASE = '../MOT16/train/MOT16-' + VIDEO_SEQ + '/'
else:
    VIDEO_SEQ = str(VIDEO_SEQ).zfill(2)
    PATH_TO_DATABASE = '../MOT16/test/MOT16-' + VIDEO_SEQ + '/'

PATH_TO_IMAGES_DIR = PATH_TO_DATABASE + 'img1/'
PATH_TO_ANNOTATIONS_DIR = PATH_TO_DATABASE + '/gt/gt.txt'

PATH_TO_OUTPUT_DIR = 'Output/'
PATH_TO_PREDICTION_DIR = os.path.join(PATH_TO_OUTPUT_DIR, MODEL_NAME + '_MOT16_' + VIDEO_SEQ) # Output/Model_MOT16_01
PREDICTION_PKL_FILE = os.path.join(PATH_TO_PREDICTION_DIR, "prediction")
FILTERED_PKL_FILE = os.path.join(PATH_TO_PREDICTION_DIR, "prediction_filtered") # Filtered to only person class
RESULT_CSV_FILE = os.path.join(PATH_TO_PREDICTION_DIR, "dt.csv") # Final result

# Size, in inches, of the output images.
IMAGE_SIZE = (12, 8)

# Store results to Output/Model directory
if not os.path.exists(PATH_TO_PREDICTION_DIR):
    os.makedirs(PATH_TO_PREDICTION_DIR)
    os.makedirs(os.path.join(PATH_TO_PREDICTION_DIR,"Image"))

# Visualize BB

In [None]:
# image_np : np array of the image
# prediction : prediction dictionary for the image
# groundtruth : groundtruth dictionary for the image
def visualize_image(image_np, prediction=None, groundtruth=None, isIDMode=False):
    if isIDMode and prediction != None:
        # Plot the prediction
        vis_util.visualize_boxes_and_labels_on_image_array(
          image_np,
          prediction['detection_boxes'],
          prediction['person_id'], #prediction['detection_classes'],
          prediction['detection_scores'],
          pc_label, #category_index,
          instance_masks=prediction.get('detection_masks'),
          use_normalized_coordinates=False,
          min_score_thresh=0.30,
          line_thickness=8)
        return
        
    if prediction != None:
        # Plot the prediction
        vis_util.visualize_boxes_and_labels_on_image_array(
          image_np,
          prediction['detection_boxes'],
          prediction['detection_classes'],
          prediction['detection_scores'],
          category_index,
          instance_masks=prediction.get('detection_masks'),
          use_normalized_coordinates=False,
          min_score_thresh=0.30,
          line_thickness=8)
    
    if groundtruth != None:
        # Plot the ground truth
        vis_util.visualize_boxes_and_labels_on_image_array(
          image_np,
          groundtruth['groundtruth_boxes'],
          groundtruth['groundtruth_classes'],
          None,
          category_index,
          instance_masks=groundtruth.get('detection_masks'),
          use_normalized_coordinates=False)   

In [None]:
# image_path : file name with extension E.g: 00010.jpg
# prediction : prediction dictionary of the image
def drawBB(image_path, prediction, isIDMode=False):
    # Visualization of the results of a detection.
    original_image_path = os.path.join(PATH_TO_IMAGES_DIR, image_path)   
    marked_image_path = os.path.join(PATH_TO_PREDICTION_DIR, 'Image')
    marked_image_path = os.path.join(marked_image_path, image_path) 

    image = Image.open(original_image_path)
    image_np = load_image_into_numpy_array(image)
    # Update the image with predicted and groundtruth BB
    #visualize_image(image_np, groundtruth=prediction)
    visualize_image(image_np, prediction=prediction, isIDMode=isIDMode)    
    im = Image.fromarray(image_np)
    IMAGE_FILE = os.path.join(marked_image_path)
    #print "Save file in" + IMAGE_FILE
    im.save(IMAGE_FILE)

## Extract groundtruth from CSV file

In [None]:
def extract_gt_for_single_image(image_id):
    annotation_path = os.path.join(PATH_TO_TEST_ANNOTATIONS_DIR, '{}.xml'.format(image_id))
    
    tree = ET.parse(annotation_path)
    root = tree.getroot()

    groundtruth_dict = {}
    gt_bbs = []
    cat_id = []
    cat_label = []
    
    for object_node in root.iterfind('object'):
        # Extract boundary box from XML files
        for bb in object_node.iterfind('bndbox'):
            gt_bb = []
            for val in ['ymin', 'xmin', 'ymax', 'xmax']:
                gt_bb.append(float(bb.find(val).text))
            gt_bbs.append(gt_bb)

        # Extract ground truth category
        child = object_node.find('name')
        cat_label.append(child.text)
        # Hardcoded as of now :/
        if child.text == 'cow':
            cat_id.append(21)
        elif child.text == 'dog':
            cat_id.append(18)
        # print(child.text)

    # Extract size
    groundtruth_dict['size'] = [int(root.find('size/width').text), int(root.find('size/height').text)]
    # Dog is category 18 / Todo update
    groundtruth_dict['num_detections'] = len(gt_bbs)
    groundtruth_dict['original_boxes'] = gt_bbs
    groundtruth_dict['groundtruth_boxes'] = np.array(gt_bbs, dtype="float32")
    groundtruth_dict['groundtruth_classes'] = np.array(cat_id) # np.full([len(gt_bbs)], 18)
    groundtruth_dict['groundtruth_class_labels'] = np.array(cat_label) # np.full([len(gt_bbs)], 'dog')
    
    return groundtruth_dict

In [None]:
# CSV text-file containing one object instance per line. Each line must contain 10 values: 
# <frame>, <id>, <bb_left>, <bb_top>, <bb_width>, <bb_height>, <conf>, <x>, <y>, <z> 
# All frame numbers, target IDs and bounding boxes are 1-based
# Frame number is image number without leading 0
# Person ID is <id>

def extract_gt():
    gt = 0
    with open(PATH_TO_ANNOTATIONS_DIR, 'rb') as csvfile:
        reader = csv.reader(csvfile, delimiter=',')
        for row in reader:
            print ', '.join(row)
            groundtruth = {}
            gt_bbs = []
            cat_label = []
            
            # For each BB in each image
            gt_bb = []
            gt_bb.append( float(row[3]) + float(row[5])  ) # ymin # bb_top - bb_height
            gt_bb.append( float(row[2]) ) # xmin # bb_left
            gt_bb.append( float(row[3]) ) # ymax # bb_top
            gt_bb.append( float(row[2]) + float(row[4]) ) # xmax # bb_left + bb_width
            
            gt_bbs.append(gt_bb)
            cat_label.append(str(category_index[1]['name']))
            print cat_label
            groundtruth['filename'] = row[1].zfill(6) + '.jpg'
            groundtruth['groundtruth_boxes'] = np.array(gt_bbs, dtype="float32")
            groundtruth['groundtruth_classes'] = np.array(cat_label)

            return groundtruth
    return gt

In [None]:
# Unroll the prediction BB as multiple row
#<frame>, <id>, <bb_left>, <bb_top>, <bb_width>, <bb_height>, <conf>, <x>, <y>, <z>
def appendMatching(image_id, prediction):    
    for i in range(prediction['num_detections']):
        row = []
        # Frame number
        row[0] = image_id
        # ID
        row[1] = prediction['person_id']
        # bb_left
        row[2] = prediction['detection_boxes'][i][1]
        # bb_top
        row[3] = prediction['detection_boxes'][i][2]
        # bb_width
        row[4] = prediction['detection_boxes'][i][3] - row[2]
        # bb_height
        row[5] = row[3] - prediction['detection_boxes'][i][0]
        row = ''.join(row)
        # conf, x, y, z
        row = row + ',0,0,0,0'
        #with open(RESULT_CSV_FILE,'a') as fd:
        #    fd.write(row)
        print row

# Matching algo
Data association via IoU

In [None]:
PERSON_COUNTER = 0

In [None]:
# Match objects between two frames and tag id to new objects
def twoFrameMatching(pie, dt, gt):
    dt = pc_utils.matchOnIoU(pie, dt, gt)
    # Assign id to unassigned detections
    for i in range(dt['num_detections']):
        if dt['person_id'][i] == -1:
            global PERSON_COUNTER
            PERSON_COUNTER += 1
            global pc_label
            pc_label[PERSON_COUNTER] = { 'id': PERSON_COUNTER, 'name' : 'PC'+ str(PERSON_COUNTER) }
            dt['person_id'][i] = PERSON_COUNTER
    return dt

In [None]:
# num_groundtruth_classes = 1
# matching_iou_threshold = 0.5
# nms_iou_threshold = 1.0
# nms_max_output_boxes = 10000

# # Per image evaluation
# pie = pc_utils.pc_PerImageEvaluation(num_groundtruth_classes, matching_iou_threshold, nms_iou_threshold,nms_max_output_boxes)

# # Dataset
# detected_boxes = np.array([[0, 0, 1, 1], [0, 0, 2, 2], [0, 0, 3, 3]], dtype=float)
# detected_scores = np.array([0.6, 0.8, 0.5], dtype=float) # Original
# dt = {}
# dt['detection_boxes'] = detected_boxes
# dt['detection_scores'] = detected_scores
# dt['detection_classes'] = np.array([1,0,1])

# groundtruth_boxes = np.array([[0, 0, 1, 1], [0, 0, 10, 10]], dtype=float)
# gt = {}
# gt['detection_boxes'] = groundtruth_boxes
# gt['person_id'] = np.array([1,2,3])
# PERSON_COUNTER = 3

# #pc_utils.matchOnIoU(pie, dt, gt)
# #twoFrameMatching(pie, dt, gt)

In [None]:
# Assign ID to person detected 
def assignID():
    with open(FILTERED_PKL_FILE,'rb') as fd:
        ev_data = pickle.load(fd)

    # Init per image evaluation
    num_groundtruth_classes = 1
    matching_iou_threshold = 0.5
    nms_iou_threshold = 1.0
    nms_max_output_boxes = 10000

    pie = pc_utils.pc_PerImageEvaluation(num_groundtruth_classes, matching_iou_threshold, nms_iou_threshold,nms_max_output_boxes)

    # First frame
    i = 1
    image_id = str(i).zfill(6)
    gt = ev_data[image_id] # treated as gt

    # Initialize the person counter
    global PERSON_COUNTER
    PERSON_COUNTER = 0
    gt['person_id'] = np.array([i+1 for i in range(gt['num_detections'])])
    
    for i in range(gt['num_detections']):
        global pc_label
        pc_label[i] = { 'id': i, 'name' : 'PC'+ str(i)}
    
    image_id = str(i+1).zfill(6)
    dt = ev_data[image_id] # treated as dt

    while i < 5: # Upto last but one    
        # Returns gt for next
        gt = twoFrameMatching(pie, dt, gt)
        # Prepare next loop
        i = i + 1
        image_id = str(i+1).zfill(6)
        dt = ev_data[image_id]



In [None]:
# from object_detection.utils import np_box_list
# from object_detection.utils import np_box_list_ops

# detected_boxlist = np_box_list.BoxList(detected_boxes)
# print detected_boxlist
# detected_boxlist.add_field('scores', detected_scores)

# print detected_boxlist.get()
# print detected_scores
# detected_boxlist = np_box_list_ops.non_max_suppression(detected_boxlist, pie.nms_max_output_boxes, pie.nms_iou_threshold)

# print detected_boxlist.get()
# print detected_boxlist.get_field('scores')

# After sort
# print "After sort"
# boxlist = np_box_list_ops.sort_by_field(detected_boxlist, 'scores')
# print boxlist.get()
# scores = boxlist.get_field('scores')
# print scores


# Main

In [None]:
# Filter prediction to only person class
def filter_prediction():
    print(PREDICTION_PKL_FILE)
    with open(PREDICTION_PKL_FILE,'rb') as fd:
        ev_data = pickle.load(fd)
        # Need to sequentially analyse
        for i in range(1,655): # 655
            # File name without extension
            image_id = str(i).zfill(6)
            prediction = ev_data[image_id]
            # Person class = 1 in COCO dataset
            idx = prediction['detection_classes'] == 1
            prediction['num_detections'] = np.count_nonzero(idx)
            prediction['detection_boxes'] = prediction['detection_boxes'][idx, :]
            prediction['detection_scores'] = prediction['detection_scores'][idx]
            prediction['detection_classes'] = prediction['detection_classes'][idx]
            print "Image", image_id, "dt", prediction['num_detections']
        # Store in pickle
        with open(FILTERED_PKL_FILE,'wb') as fd2:
            pickle.dump(ev_data, fd2)

In [None]:
# Evaluate the prediction
def evaluate():
    print(FILTERED_PKL_FILE)
    with open(FILTERED_PKL_FILE,'rb') as fd:
        ev_data = pickle.load(fd)
        # Need to sequentially analyse
        for i in range(1,5): #655
            # File name without extension
            image_id = str(i).zfill(6)
            prediction = ev_data[image_id]
            #print "Image", i
            #print prediction['detection_boxes']
            # Plot the BB on image
            
            # Give dummy person id
            global pc_label
            for i in range(prediction['num_detections']):
                pc_label[i] = { 'id': i, 'name' : 'PC'+ str(i) }
            prediction['person_id'] = np.array([i+1 for i in range(prediction['num_detections'])])
            
            image_path = image_id + '.jpg'
            drawBB(image_path, prediction, isIDMode=True)

In [None]:
def main():
    #filter_prediction()
    #ev_data = evaluate()
    evaluate()
    print "Done"
main()