# Evaluate trained model

In [1]:
import os
import cv2
import numpy as np
import tensorflow as tf
import sys
from matplotlib import pyplot as plt
from matplotlib.pyplot import imshow
from PIL import Image
import xml.etree.ElementTree as ET

# Import utilites
from utils import label_map_util
from utils import visualization_utils as vis_util



%matplotlib inline

## change path, model name etc to your needs

In [2]:
MODEL_NAME = 'inference_graph'   # the result from Step 6 Export Inference Graph
IMAGE_PATH = 'images'
IMAGE_NAME = '01-20190414173244-01.jpg'   # cmdline arg
# Size, in inches, of the output images.
IMAGE_SIZE = (12, 8)
CWD_PATH = os.path.join(os.getcwd(),'..')  # should become gitbase
NUM_CLASSES = 1

In [3]:
# model
PATH_TO_MODEL = os.path.join(CWD_PATH, MODEL_NAME, 'frozen_inference_graph.pb')
# label map
LABEL_MAP = os.path.join(CWD_PATH, 'data', 'object-detection.pbtxt')
# path to image
PATH_TO_IMAGE = os.path.join(CWD_PATH, IMAGE_PATH, IMAGE_NAME)

### Helper Functions

In [4]:
def load_label_map(path_to_labels, num_classes):
    label_map = label_map_util.load_labelmap(path_to_labels)
    categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=num_classes, use_display_name=True)
    category_index = label_map_util.create_category_index(categories)
    return categories, category_index

# from https://github.com/tensorflow/models/blob/master/research/object_detection/object_detection_tutorial.ipynb
def load_image_into_numpy_array(image):
  (im_width, im_height) = image.size
  return np.array(image.getdata()).reshape((im_height, im_width, 3)).astype(np.uint8)

# from https://github.com/tensorflow/models/blob/master/research/object_detection/object_detection_tutorial.ipynb
def run_inference_for_single_image(image, graph):
  with graph.as_default():
    with tf.Session() as sess:
      # Get handles to input and output tensors
      ops = tf.get_default_graph().get_operations()
      all_tensor_names = {output.name for op in ops for output in op.outputs}
      tensor_dict = {}
      for key in [
          'num_detections', 'detection_boxes', 'detection_scores',
          'detection_classes', 'detection_masks'
      ]:
        tensor_name = key + ':0'
        if tensor_name in all_tensor_names:
          tensor_dict[key] = tf.get_default_graph().get_tensor_by_name(
              tensor_name)
      if 'detection_masks' in tensor_dict:
        # The following processing is only for single image
        detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0])
        detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0])
        # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size.
        real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32)
        detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1])
        detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1])
        detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
            detection_masks, detection_boxes, image.shape[1], image.shape[2])
        detection_masks_reframed = tf.cast(
            tf.greater(detection_masks_reframed, 0.5), tf.uint8)
        # Follow the convention by adding back the batch dimension
        tensor_dict['detection_masks'] = tf.expand_dims(
            detection_masks_reframed, 0)
      image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0')

      # Run inference
      output_dict = sess.run(tensor_dict,
                             feed_dict={image_tensor: image})

      # all outputs are float32 numpy arrays, so convert types as appropriate
      output_dict['num_detections'] = int(output_dict['num_detections'][0])
      output_dict['detection_classes'] = output_dict[
          'detection_classes'][0].astype(np.int64)
      output_dict['detection_boxes'] = output_dict['detection_boxes'][0]
      output_dict['detection_scores'] = output_dict['detection_scores'][0]
      if 'detection_masks' in output_dict:
        output_dict['detection_masks'] = output_dict['detection_masks'][0]
  return output_dict


In [5]:
categories, category_index = load_label_map(LABEL_MAP, NUM_CLASSES)

## Load (frozen) Model into Memory

In [6]:
detection_graph = tf.Graph()
with detection_graph.as_default():
    od_graph_def = tf.GraphDef()
    with tf.gfile.GFile(PATH_TO_MODEL, 'rb') as fid:
        serialized_graph = fid.read()
        od_graph_def.ParseFromString(serialized_graph)
        tf.import_graph_def(od_graph_def, name='')

    sess = tf.Session(graph=detection_graph)
   

In [8]:
# load image
image =  Image.open(PATH_TO_IMAGE)
#imshow(np.asarray(image))
# reshape image to [1, None, None, 3] so all pixels are in one column
image_np = load_image_into_numpy_array(image)
image_np_expanded = np.expand_dims(image_np, axis=0)

## detect!
#(boxes, scores, classes, num) = sess.run([detection_boxes, detection_scores, detection_classes, num_detections],
#    feed_dict={image_tensor: image_expanded})

In [9]:
output_dict = run_inference_for_single_image(image_np_expanded, detection_graph)

In [11]:
vis_util.visualize_boxes_and_labels_on_image_array(
    image_np,
     output_dict['detection_boxes'],
      output_dict['detection_classes'],
      output_dict['detection_scores'],
      category_index,
      instance_masks=output_dict.get('detection_masks'),
      use_normalized_coordinates=True,
      line_thickness=4 )

# All the results have been drawn on image. Now display the image.
plt.figure(figsize=IMAGE_SIZE)
plt.imshow(image_np)
print(IMAGE_NAME)


01-20190414173244-01.jpg


<Figure size 864x576 with 0 Axes>

In [None]:
boxes = output_dict['detection_boxes']
scores = output_dict['detection_scores']


In [None]:
def get_boxes_to_use(detection_boxes, detection_scores, min_score_threshold=0.8):
    boxes = detection_boxes
    scores = detection_scores
    #print(output_dict['detection_boxes'])
    num_boxes = boxes.shape[0]    
    boxes_to_use = []
    for i in range(num_boxes):
        if scores is None or scores[i] > min_score_threshold:
            box = boxes[i]
            boxes_to_use.append(box)       
            #score = int(scores[i] * 100)
            #print(f"Use box[{i}]={boxes[i]}. (score is {score}%)")
    return boxes_to_use        
    

In [None]:
boxes_to_use = get_boxes_to_use(boxes, scores)
boxes_to_use[0]

In [None]:
def get_normalized_coordinates(box, image_size):
    '''
    return box coordinates resized to image_size
    left, right, top, bottom aka xmin, xmax, ymin, ymax
    '''
    (ymin, xmin, ymax, xmax) = box
    im_width = image_size[0]
    im_height = image_size[1]
    (left, right, top, bottom) = (int(xmin * im_width), int(xmax * im_width), int(ymin * im_height), int(ymax * im_height))
    box = { 'xmin': left, 'xmax': right, 'ymin': top, 'ymax': bottom }
    return box
    

In [None]:
box = get_normalized_coordinates(boxes_to_use[0], image.size)

In [None]:
box

In [None]:
def box_to_labelimg_xml(filename, image_size, boxes, label='dog', path=None, ):
    # todo: change xmin, xmax... to array of 'boxes'
    root = ET.Element('annotation')
    folder = ET.SubElement(root, 'folder')
    folder.text = 'images'
    fname = ET.SubElement(root, 'filename')
    fname.text = filename
    
    source = ET.SubElement(root, 'source')
    database = ET.SubElement(source, 'database')
    database.text = 'Unknown'
    
    size = ET.SubElement(root, 'size')
    width = ET.SubElement(size, 'width')
    width.text = str(image_size[0])
    height = ET.SubElement(size, 'height')
    height.text = str(image_size[1])
    depth = ET.SubElement(size, 'depth')
    depth.text = '3'
    
    segmented = ET.SubElement(root, 'segmented')
    segmented.text = '0'
    
    ### 1 box. each box becomes 'object'
    for box in boxes:
        obj = ET.SubElement(root, 'object')
        name = ET.SubElement(obj, 'name')
        name.text = label
        pose = ET.SubElement(obj, 'pose')
        pose.text = 'Unspecified'
        truncated = ET.SubElement(obj, 'truncated')
        truncated.text = '0'
        difficult = ET.SubElement(obj, 'difficult')
        difficult.text = '0'
        bndbox = ET.SubElement(obj, 'bndbox')
        xminx = ET.SubElement(bndbox, 'xmin')
        xminx.text = str(box['xmin'])
        xmaxx = ET.SubElement(bndbox, 'xmax')
        xmaxx.text = str(box['xmax'])
        yminx = ET.SubElement(bndbox, 'ymin')
        yminx.text = str(box['ymin'])
        ymaxx = ET.SubElement(bndbox, 'ymax')
        ymaxx.text = str(box['ymax'])

    tree = ET.ElementTree(root)
    return tree
    

In [None]:
boxes = []
boxes.append(box)
boxes.append(box)
xml = box_to_labelimg_xml('file.jpg', (640,480), boxes)
ET.dump(xml)
#xml.tostring()