# Object Detection using Tensorflow


## Import Required Library



In [None]:
# importing tensorflow library
import tensorflow as tf

In [None]:
# Print Tensorflow version
print(tf.__version__)

# Check available GPU devices.
print("The following GPU devices are available: %s" % tf.test.gpu_device_name())

In [None]:
tf.config.list_physical_devices()

In [None]:
# For running inference on the TF-Hub module.
import tensorflow_hub as hub

# For downloading the image.
import matplotlib.pyplot as plt

# For drawing onto the image.
import numpy as np
from PIL import Image, ImageColor, ImageDraw, ImageFont, ImageOps

# For measuring the inference time.
import time


### Function for showing image

In [None]:
def show_image(image):
    fig = plt.figure(figsize=(20, 15))
    plt.grid(False)
    plt.imshow(image) 

### Function for loading and resize the image

In [None]:
def load_image_resize(file_path, new_width=256, new_height=256,display=False):
  
    pil_image = Image.open(file_path) # for opening Image
    pil_image = ImageOps.fit(pil_image, (new_width, new_height), Image.ANTIALIAS) # for resizing image to 256 * 256 ANTIALIAS is filter

    if display:
        show_image(pil_image)
    return file_path

### Function to create bounding box over object 

In [None]:
def bounding_box_over_object(image, y_min, x_min, y_max, x_max, color, font, thickness=7, display_str_list=()):
    
    """Adds a bounding box to an image."""
    draw = ImageDraw.Draw(image) # used to draw object over images
    im_width, im_height = image.size
    
    (left, right, top, bottom) = (x_min * im_width, x_max * im_width, y_min * im_height, y_max * im_height)
    
    draw.line([(left, top), (left, bottom), (right, bottom), (right, top),(left, top)],width=thickness,fill=color)

    ''' 
    If the total height of the display strings added to the top of the bounding
    box exceeds the top of the image, stack the strings below the bounding box
    instead of above.
    '''
    display_str_heights = [font.getsize(list_val)[1] for list_val in display_str_list]
    
    # Each display_str has a top and bottom margin of 0.05x.
    total_display_str_height = (1 + 2 * 0.05) * sum(display_str_heights)

    if top > total_display_str_height:
        text_bottom = top
    else:
        text_bottom = top + total_display_str_height
        
    # Reverse list and print from bottom to top.
    for display_str in display_str_list[::-1]:
        
        text_width, text_height = font.getsize(display_str)
        margin = np.ceil(0.05 * text_height)
        draw.rectangle([(left, text_bottom - text_height - 2 * margin),  (left + text_width, text_bottom)], fill=color)
        
        draw.text((left + margin, text_bottom - text_height - margin), display_str, fill="black", font=font)
        text_bottom -= text_height - 2 * margin


### Function to draw boxes if score is >= Min Score

In [None]:

def draw_boxes(image, boxes, class_names, scores, max_boxes=10, min_score=0.12):
    """Overlay labeled boxes on an image with formatted scores and label names."""
    colors = list(ImageColor.colormap.values()) # create list of color from various colors

    #font = ImageFont.load_default()
    font = ImageFont.truetype('arial.ttf', 15)

    for i in range(min(boxes.shape[0], max_boxes)):
        if scores[i] >= min_score:
            y_min, x_min, y_max, x_max = tuple(boxes[i])
            display_str = "{}: {}%".format(class_names[i].decode("ascii"),int(100 * scores[i]))
            color = colors[hash(class_names[i]) % len(colors)]
            image_pil = Image.fromarray(np.uint8(image)).convert("RGB")
            bounding_box_over_object(image_pil, y_min, x_min, y_max, x_max, color, font, thickness=7, display_str_list=[display_str])
            np.copyto(image, np.array(image_pil))
    return image

### Apply module

> Tensorflow module provide various API to directly run detection.
<br>
> Here,module dataset taken is Open Images v4 , and saved locally to detect objects

Pick an object detection module and apply on the images
<br>
Various tensorflow object detection API are there.
<br>
Modules used : 

>* **ssd+mobilenet V2**: small and fast,


In [None]:
ls

In [None]:
module_handle = "openimages_v4_ssd_mobilenet_v2_1"
#module_handle = "faster_rcnn_openimages_v4_inception_resnet_v2_1"
detector = hub.load(module_handle).signatures['default']

### Input:
A three-channel image of variable size - the model does NOT support batching. The input tensor is a tf.float32 tensor

### The output dictionary contains:

detection_boxes: a tf.float32 tensor of shape [N, 4] containing bounding box coordinates in the following order: [ymin, xmin, ymax, xmax].
<br><br>
detection_class_entities: a tf.string tensor of shape [N] containing detection class names as Freebase MIDs.
<br><br>
detection_class_names: a tf.string tensor of shape [N] containing human-readable detection class names.
<br><br>
detection_class_labels: a tf.int64 tensor of shape [N] with class indices.
<br><br>
detection_scores: a tf.float32 tensor of shape [N] containing detection scores.

### Function to load images decode them into 3 channels

In [None]:
def load_img(path):
    img = tf.io.read_file(path)
    img = tf.image.decode_jpeg(img, channels=3)
    return img

### Function to run object detector with inference time

In [None]:
def run_detector(detector, path):
    img = load_img(path)

    converted_img  = tf.image.convert_image_dtype(img, tf.float32)[tf.newaxis, ...]
    start_time = time.time()
    result = detector(converted_img)
    end_time = time.time()
    
    
    
    result = {key:value.numpy() for key,value in result.items()}
    #print(result['detection_boxes'], result['detection_class_entities'],result['detection_class_names'],result['detection_class_labels'],result['detection_scores'])
    print("Found %d objects." % len(result["detection_scores"]))
    print("Inference time: ", end_time-start_time)

    image_with_boxes = draw_boxes(img.numpy(), result["detection_boxes"],result["detection_class_entities"], result["detection_scores"])

    show_image(image_with_boxes)

###  File Path of Images



In [None]:
ls

In [None]:

def detect_img(image_path):
    image_path = load_image_resize(image_path, 640, 480)
    run_detector(detector, image_path)

In [None]:
image_path = "test_images/people.png"
detect_img(image_path)

In [None]:
image_path = [
  "test_images/traffic.jpg",
    "test_images/pedestrian.jpg",
  ]
for i in range(len(image_path)):
    detect_img(image_path[i])

In [None]:
image_path = "test_images/dog_cat.jpg"
detect_img(image_path)

In [None]:
image_path = "test_images/beach.jpg"
detect_img(image_path)

In [None]:
import cv2

In [None]:
def run_video_detector(detector, image_np):

    converted_img  = tf.image.convert_image_dtype(image_np, tf.float32)[tf.newaxis, ...]
    start_time = time.time()
    result = detector(converted_img)
    end_time = time.time()
    
    
    
    result = {key:value.numpy() for key,value in result.items()}
    #print(result['detection_boxes'], result['detection_class_entities'],result['detection_class_names'],result['detection_class_labels'],result['detection_scores'])
    print("Found %d objects." % len(result["detection_scores"]))
    print("Inference time: ", end_time-start_time)

    image_with_boxes = draw_boxes(image_np, result["detection_boxes"],result["detection_class_entities"], result["detection_scores"],min_score=0.20)

    show_image(image_with_boxes)

In [None]:
def call_video_detector():
    cap = cv2.VideoCapture(0)
    while True:
        # Read frame from camera
        ret, image_np = cap.read()

        # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
        image_np_expanded = np.expand_dims(image_np, axis=0)

        input_tensor = tf.convert_to_tensor(np.expand_dims(image_np, 0), dtype=tf.float32)
        result = detector(input_tensor)
        
 
        image_np_with_detections = image_np.copy()

        run_video_detector(detector, image_np_with_detections)
        # Display output
        cv2.imshow('Object Detection', cv2.resize(image_np_with_detections, (1024, 780)))

        if cv2.waitKey(1) & 0xFF == ord('q'):
            cap.release()
            cv2.destroyAllWindows()


In [None]:
call_video_detector()