# Object detection Examples

&nbsp;

<div style="text-align: left;">
    <img src="../utils/1ampere_logo_®_primary_stacked_rgb.png" alt="Image not found" width="50%"/>    
</div>

<br>

Ampere AI software stack is the software acceleration layer of Ampere Cloud Native Processors specifically dedicated to accelerating AI workloads running on Ampere Processors. Ampere Optimized AI Frameworks include PyTorch, TensorFlow, and ONNXRuntime. This drop-in library seamlessly supports all AI applications developed in the most popular AI frameworks. It works  right out-of-the-box without API changes or any additional coding. Additionally, the Ampere AI software engineering team provides the publicly accessbile Ampere Model Library (AML) for testing and benchmarking the performance of Ampere Cloud Native Processors for some of the most common AI inference workloads.

Please visit us at https://amperecomputing.com


## COCO Dataset Overview
<div style="text-align: left;">
    <img src="https://cocodataset.org/images/coco-logo.png" alt="nn" style="width: 200px;"/>
</div>

<br>

These examples are using subset of COCO object detection validation set from year 2014.
COCO is a large-scale object detection dataset that has been instrumental in advancing computer vision and deep learning research.

More info can be found here: https://cocodataset.org/

&nbsp;

In [1]:
import os
import time
import torch
import torchvision
import subprocess
import numpy as np
from matplotlib import pyplot as plt

#DELETE BELOW
import sys
sys.path.append('..')

from utils.cv.coco import COCODataset
import utils.benchmark as bench_utils
import utils.misc as utils
import utils.post_processing as pp
import sys
import cv2

LAT_BATCH_SIZE = 1

  from .autonotebook import tqdm as notebook_tqdm


## Latency with SSD VGG16 in fp32 precision

AIO offers a significant speed-up in standard fp32 inference scenarios. AIO exposes API to control behavior of the optimizer. This example shows the performance of SSD VGG16 model in fp32 precision.
Original ResNet paper can be found here: https://arxiv.org/pdf/1512.03385.pdf

In [2]:
# ! AIO_NUM_THREADS should be set prior to launching jupyter notebook !
num_threads = os.getenv('AIO_NUM_THREADS')
images_path = os.getenv('COCO_IMG_PATH')
anno_path = os.getenv('COCO_ANNO_PATH')

if num_threads is None:
    print("AIO_NUM_THREADS not defined")
    exit()
else:
    print("AIO_NUM_THREADS = {}".format(num_threads))

# Set Pytorch intra thread count, which should match AIO_NUM_THREADS
torch.set_num_threads(int(num_threads))

# Load model and apply Torchscript for inference deployment
model = torchvision.models.detection.ssd300_vgg16(pretrained=True)
model.eval()
model_script = torch.jit.script(model)
frozen_script = torch.jit.freeze(model_script)

# ImageNet dataset initialization
input_shape = (300, 300)
dataset_aio = COCODataset(LAT_BATCH_SIZE, "BGR", "COCO_val2014_000000000000", images_path, anno_path,
                          pre_processing="PyTorch_objdet", sort_ascending=True, order="NCHW")
dataset_non_aio = COCODataset(LAT_BATCH_SIZE, "BGR", "COCO_val2014_000000000000", images_path, anno_path,
                          pre_processing="PyTorch_objdet", sort_ascending=True, order="NCHW")


input_array = dataset_aio.get_input_array(input_shape)
num_of_runs = dataset_aio.available_instances
if num_of_runs <= 2:
    print("The first two runs are warm up. Please provide more than two input images")
    exit()

torch._C._aio_force_enable()
count = 0
total_time = 0
for _ in range(num_of_runs):
    with torch.no_grad():
        start = time.time()
        output_tensor = frozen_script(input_array)
        end = time.time()
        output_aio = output_tensor[1]
        for i in range(LAT_BATCH_SIZE):
            for d in range(output_aio[i]['boxes'].shape[0]):
                dataset_aio.submit_bbox_prediction(
                    i,
                    dataset_aio.convert_bbox_to_coco_order(output_aio[i]['boxes'][d].tolist()),
                    output_aio[i]['scores'][d].item(),
                    output_aio[i]['labels'][d].item()
                )
        count = count + 1
        if count > 2:
            total_time += end - start
latency_aio = total_time / (count - 2) 

torch._C._aio_force_disable()
input_array = dataset_non_aio.get_input_array(input_shape)
num_of_runs = dataset_non_aio.available_instances
count = 0
total_time = 0
for _ in range(num_of_runs):
    with torch.no_grad():
        start = time.time()
        output_tensor = frozen_script(input_array)
        end = time.time()
        output_non_aio = output_tensor[1]
        for i in range(LAT_BATCH_SIZE):
            for d in range(output_non_aio[i]['boxes'].shape[0]):
                dataset_non_aio.submit_bbox_prediction(
                    i,
                    dataset_non_aio.convert_bbox_to_coco_order(output_non_aio[i]['boxes'][d].tolist()),
                    output_non_aio[i]['scores'][d].item(),
                    output_non_aio[i]['labels'][d].item()
                )
        count = count + 1
        if count > 2:
            total_time += end - start
latency_non_aio = total_time / (count - 2)
        

AIO_NUM_THREADS not defined


TypeError: int() argument must be a string, a bytes-like object or a number, not 'NoneType'

In [None]:
# visualizing output
# for the purpose of visualizing results let's load the image without pre-processing
img = cv2.imread(str(dataset_aio.path_to_latest_image))

def post_process(image, output):
    for i in range(LAT_BATCH_SIZE):
                for d in range(output_aio[i]['boxes'].shape[0]):
                    
                    if output_aio[i]["scores"][d] < 0.3:
                        continue
                        
                    converted_bbox = dataset_aio.convert_bbox_to_coco_order(
                        output_aio[i]['boxes'][d].tolist(),
                        absolute=False
                    )
                
                    converted_bbox = dataset_aio.rescale_bbox(i, converted_bbox)
                
                    image = pp.draw_bbox(img, converted_bbox, int(output_aio[i]["labels"][d].tolist()))

    return image
    
# show the image
image = cv2.cvtColor(post_process(img, output_aio), cv2.COLOR_BGR2RGB)
plt.imshow(image)
plt.show()
print("SSD VGG16 FP32 output with AIO enabled\n")
print("Latency = {:.0f} ms".format(latency_aio * 1000))
dataset_aio.summarize_accuracy()

image = cv2.cvtColor(post_process(img, output_non_aio), cv2.COLOR_BGR2RGB)
plt.imshow(image)
plt.show()
print("SSD VGG16 FP32 output with AIO disabled\n")
print("Latency = {:.0f} ms".format(latency_non_aio * 1000))
dataset_non_aio.summarize_accuracy()