# Object Detection Examples

&nbsp;

<img align="left" src="https://media-exp1.licdn.com/dms/image/C4E1BAQFay3CGU2VmRg/company-background_10000/0/1621019049425?e=2159024400&v=beta&t=dmPCWeJlWvkzmM019V4_oKMluIPkQPX52i0zdgP-x2M" alt="nn" style="width: 800px;"/>

&nbsp;

&nbsp;

&nbsp;

&nbsp;

&nbsp;

<img align="left" src="https://media-exp1.licdn.com/dms/image/C4E0BAQHJY2WpOb492w/company-logo_200_200/0/1620816916063?e=2159024400&v=beta&t=U-VvNpjzV2DLp4EBIeqI8ZIWYUekPeJOQyxfXaJBMnU" alt="nn" style="width: 25px;"/>

Please visit us at https://onspecta.com

&nbsp;

## COCO Dataset Overview
<img align="left" src="https://cocodataset.org/images/coco-logo.png" alt="nn" style="width: 200px;"/>

&nbsp;

&nbsp;

These examples are using subset of COCO object detection validation set from year 2014.
COCO is a large-scale object detection, segmentation, and captioning dataset.

More info can be found here: https://cocodataset.org

&nbsp;

In [None]:
import os
import cv2
import time
import numpy as np
import tensorflow as tf
from matplotlib import pyplot as plt

from utils.coco import COCODataset
import utils.post_processing as pp
import utils.benchmark as bench_utils

BATCH_SIZE = 1

## YOLO v4 Tiny in fp32 precision using TF2 api

DLS offers a significant speed-up in standard fp32 inference scenarios.
This example shows the performance of Yolo v4 Tiny model in fp32 precision.
Original Yolo v4 paper can be found here: https://arxiv.org/pdf/2004.10934.pdf

In [None]:
input_shape = (416, 416)
threshold = 0.3
path_to_model = "yolo_v4_tiny/yolo_v4_tiny_tf_fp32"

In [None]:
# first let's load the model

saved_model_loaded = tf.saved_model.load(path_to_model, tags=[tag_constants.SERVING])
yolo = self.__saved_model_loaded.signatures['serving_default']

In [None]:
# ! DLS_NUM_THREADS should be set prior to launching jupyter notebook !

# setting the configuration
tf.config.threading.set_intra_op_parallelism_threads(bench_utils.get_intra_op_parallelism_threads())
tf.config.threading.set_inter_op_parallelism_threads(1)

In [None]:
# initialization of COCO dataset
coco = COCODataset(
    batch_size=BATCH_SIZE,
    color_model="RGB",
    images_filename_base="COCO_val2014_000000000000",
    pre_processing="YOLO"
)

# for the purpose of visualizing results let's load the image without pre-processing
img = cv2.imread(str(coco.path_to_latest_image))

In [None]:
# running the model with DLS enabled

tf.DLS.force_enable()

# warm-up run
_ = yolo(tf.constant(coco.get_input_array(input_shape)))

# actual run
start = time.time()
output = yolo(tf.constant(coco.get_input_array(input_shape)))
finish = time.time()

latency_ms = (finish - start) * 1000
print("\nYOLO v4 Tiny FP32 latency with DLS: {:.0f} ms\n".format(latency_ms))

## SSD Inception v2 in fp16 precision using TF1 api

This example shows the performance of SSD Inception v2 model converted to fp16 precision.
Models in fp16 precision are expected to offer accuracy on par with fp32 counterparts and up to 2x inference speed-up
on compatible hardware when run with DLS. You can read more on SSD architecture here: https://arxiv.org/pdf/1512.02325.pdf

In [None]:
input_shape = (300, 300)
threshold = 0.3
path_to_model = "ssd_inception_v2/ssd_inception_v2_tf_fp16.pb"
output_names = ["detection_classes:0", "detection_boxes:0", "detection_scores:0", "num_detections:0"]

In [None]:
# first let's load the model

graph = tf.compat.v1.Graph()
with graph.as_default():
    graph_def = tf.compat.v1.GraphDef()
    with tf.compat.v1.gfile.GFile(path_to_model, 'rb') as fid:
        serialized_graph = fid.read()
        graph_def.ParseFromString(serialized_graph)
        tf.compat.v1.import_graph_def(graph_def, name="")

In [None]:
# ! DLS_NUM_THREADS should be set prior to launching jupyter notebook !

# creating TF config
config = tf.compat.v1.ConfigProto()
config.allow_soft_placement = True
config.intra_op_parallelism_threads = bench_utils.get_intra_op_parallelism_threads()
config.inter_op_parallelism_threads = 1

In [None]:
# preparing input and output dictionaries

# creation of output dictionary
output_dict = {output_name: graph.get_tensor_by_name(output_name) for output_name in output_names}

# initialization of COCO dataset
coco = COCODataset(
    batch_size=BATCH_SIZE,
    color_model="BGR",
    images_filename_base="COCO_val2014_000000000000"
)

# assignment of input image to input tensor
feed_dict = {graph.get_tensor_by_name("image_tensor:0"): coco.get_input_array(target_shape=input_shape)}

# for the purpose of visualizing results let's load the image without pre-processing
img = cv2.imread(str(coco.path_to_latest_image))

In [None]:
# running the model with DLS enabled

tf.DLS.force_enable()

with tf.compat.v1.Session(config=config, graph=graph) as sess:
    # warm-up run
    _ = sess.run(output_dict, feed_dict)

    # actual run
    start = time.time()
    output_dls = sess.run(output_dict, feed_dict)
    finish = time.time()

latency_ms = (finish - start) * 1000
print("\nSSD Inception v2 FP16 latency with DLS: {:.0f} ms\n".format(latency_ms))

In [None]:
# running the model with DLS disabled

tf.DLS.force_disable()

with tf.compat.v1.Session(config=config, graph=graph) as sess:
    # warm-up run
    _ = sess.run(output_dict, feed_dict)

    # actual run
    start = time.time()
    output_no_dls = sess.run(output_dict, feed_dict)
    finish = time.time()

latency_ms = (finish - start) * 1000
print("\nSSD Inception v2 FP16 latency without DLS: {:.0f} ms\n".format(latency_ms))

In [None]:
# visualizing output

# post-processing
def post_process(image, output):
    for i in range(BATCH_SIZE):
        for d in range(int(output["num_detections:0"][i])):

            # the detected object does not exceed a set threshold we skip it
            if output["detection_scores:0"][i][d] < threshold:
                continue

            # first let's switch order of bbox boundaries from [top left bottom right] to [left top right bottom]
            converted_bbox = coco.convert_bbox_to_coco_order(
                output["detection_boxes:0"][i][d] * input_shape[0],
                1, 0, 3, 2,
                absolute=False
            )

            # then rescale back to original image ratio
            converted_bbox = coco.rescale_bbox(i, converted_bbox)

            # we can now draw bbox on the original input image
            image = pp.draw_bbox(image, converted_bbox, int(output["detection_classes:0"][i][d]))

    return image

# show the post-processed images
plt.imshow(cv2.cvtColor(post_process(img, output_dls), cv2.COLOR_BGR2RGB))
plt.show()
print("Output with DLS enabled\n")

plt.imshow(cv2.cvtColor(post_process(img, output_no_dls), cv2.COLOR_BGR2RGB))
plt.show()
print("Output with DLS disabled\n")