# Object Detection Demo
Welcome to the object detection inference walkthrough!  This notebook will walk you step by step through the process of using a pre-trained model to detect objects in an image. Make sure to follow the [installation instructions](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/installation.md) before you start.

# Imports

In [4]:
import numpy as np
import tensorflow as tf
from collections import defaultdict
from io import StringIO
from matplotlib import pyplot as plt
from PIL import Image

from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as vis_util
from object_detection.utils import ops as utils_ops

if tf.__version__ < '1.4.0':
  raise ImportError('Please upgrade your tensorflow installation to v1.4.* or later!')

%matplotlib inline

This call to matplotlib.use() has no effect because the backend has already
been chosen; matplotlib.use() must be called *before* pylab, matplotlib.pyplot,
or matplotlib.backends is imported for the first time.

The backend was *originally* set to 'module://ipykernel.pylab.backend_inline' by the following code:
  File "/home/r/miniconda3/envs/cat/lib/python3.5/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/home/r/miniconda3/envs/cat/lib/python3.5/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/home/r/miniconda3/envs/cat/lib/python3.5/site-packages/ipykernel/__main__.py", line 3, in <module>
    app.launch_new_instance()
  File "/home/r/miniconda3/envs/cat/lib/python3.5/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/home/r/miniconda3/envs/cat/lib/python3.5/site-packages/ipykernel/kernelapp.py", line 486, in start
    self.io_loop.start()
  File "/home/r/miniconda3/envs/cat/lib/py

# Model preparation 

## Variables

Any model exported using the `export_inference_graph.py` tool can be loaded here simply by changing `PATH_TO_CKPT` to point to a new .pb file.  

By default we use an "SSD with Mobilenet" model here. See the [detection model zoo](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/detection_model_zoo.md) for a list of other models that can be run out-of-the-box with varying speeds and accuracies.

In [8]:
# Path to frozen detection graph. This is the actual model that is used for the object detection.
PATH_TO_CKPT = 'model/frozen_inference_graph.pb'

# List of the strings that is used to add correct label for each box.
PATH_TO_LABELS = 'data/mscoco_label_map.pbtxt'

#number of classes
NUM_CLASSES = 90

detection_graph = tf.Graph()
with detection_graph.as_default():
  od_graph_def = tf.GraphDef()
  with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
    serialized_graph = fid.read()
    od_graph_def.ParseFromString(serialized_graph)
    tf.import_graph_def(od_graph_def, name='')

label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
category_index = label_map_util.create_category_index(categories)

def load_image_into_numpy_array(image):
  (im_width, im_height) = image.size
  return np.array(image.getdata()).reshape(
      (im_height, im_width, 3)).astype(np.uint8)

In [9]:
def filter_class(classes, scores, b_boxes, class_filter, score_filter):
    return_list = []
    for i in range(classes.shape[0]):
        if classes[i]==class_filter and scores[i]>score_filter:
            return_list.append({'class':classes[i], 'score':scores[i], 'b_box':b_boxes[i]})
    return(return_list)

def filter_class_as_lists(classes, scores, b_boxes, class_filter, score_filter):
    #import ipdb; ipdb.set_trace() 
    return_classes = []
    return_scores = []
    return_b_boxes = []
    for i in range(classes.shape[0]):
        if classes[i]==class_filter and scores[i]>score_filter:
            return_classes.append(classes[i])
            return_scores.append(scores[i])
            return_b_boxes.append(b_boxes[i])
    return(return_classes,return_scores,return_b_boxes)

def add_icon_position(original_image, icon_logo,roi_coords):
    
    rows,cols,channels = original_image.shape
    print("original image shape: ",  original_image.shape)
    print("icon logo shape: ", icon_logo.shape)
    print("input roi: ", roi_coords)

    ## Normalized coordinates
    (left, right, top, bottom) = (roi_coords[0] * rows, roi_coords[2] * rows, 
                                  roi_coords[1] * cols, roi_coords[3] * cols)

    (left, right, top, bottom) = tuple(map(int,(left, right, top, bottom)))
    print("converted coords from normalized: ", (left, right, top, bottom))

    new_width = right - left
    new_height = bottom - top
    
    print("w: ", new_width)
    print("h: ", new_height)
    
    icon_logo = cv2.resize(icon_logo, (new_height,new_width))
    print("icon logo size: ", icon_logo.shape)
    roi =  original_image[left:right,top:bottom]
    
    print("new roi shape: ", roi.shape)

    # Now create a mask of logo and create its inverse mask also
    # in this case, is already B/W
    mask = cv2.cvtColor(icon_logo,cv2.COLOR_BGR2GRAY)
    print("mask shape: ", mask.shape)
    mask_inv = cv2.bitwise_not(mask)
    print("inv mask: shape", mask_inv)

    # Now black-out the area of logo in ROI
    original_image_bg = cv2.bitwise_and(roi,roi,mask = mask)

    # Take only region of logo from logo image.
    icon_logo_fg = cv2.bitwise_and(icon_logo,icon_logo,mask = mask_inv)

    # Put logo in ROI and modify the main image
    dst = cv2.add(original_image_bg,icon_logo_fg)
    original_image[left:right,top:bottom] = dst
    
    return(original_image)

In [1]:
width=1920
height=1080
frame_counter = 0
frame_alarm_counter = 24*3
last_cat_box=[]
import cv2

imstack = cv2.imread("cat_icon.png")

cap = cv2.VideoCapture(1)
# Running the tensorflow session
with detection_graph.as_default():
    with tf.Session(graph=detection_graph) as sess:
        sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))
        ret = True
        while (ret):
            ret,image_np = cap.read()
            # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
            image_np_expanded = np.expand_dims(image_np, axis=0)
            image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
            # Each box represents a part of the image where a particular object was detected.
            boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
            # Each score represent how level of confidence for each of the objects.
            # Score is shown on the result image, together with the class label.
            scores = detection_graph.get_tensor_by_name('detection_scores:0')
            classes = detection_graph.get_tensor_by_name('detection_classes:0')
            num_detections = detection_graph.get_tensor_by_name('num_detections:0')
            # Actual detection.
            (boxes, scores, classes, num_detections) = sess.run(
                [boxes, scores, classes, num_detections],
                feed_dict={image_tensor: image_np_expanded})
            classes,scores,boxes = filter_class_as_lists(np.squeeze(classes),
                                                         np.squeeze(scores),
                                                         np.squeeze(boxes),
                                                         17,
                                                         0.4)
            boxes = np.array(boxes)
            classes = np.array(classes).astype(np.int32)
            num_detections = len(classes)
            # Visualization of the results of a detection.
            vis_util.visualize_boxes_and_labels_on_image_array(
                image_np,
                boxes,
                classes,
                scores,
                category_index,
                use_normalized_coordinates=True,
                line_thickness=8)
            
            #                   ymin,
            #                   xmin,
            #                   ymax,
            #                   xmax,
            
            # Found a cat
            if any(classes):
                last_cat_box = boxes[0]
            # No cat, print the cat icon if the last known position of the cat exist
            elif any(last_cat_box):
                img_cv = add_icon_position(image_np,imstack,last_cat_box)                    
                    
            img_cv = cv2.resize(image_np,(width,height))
#            if not classes and frame_alarm_counter >= frame_counter:
#                img_cv = cv2.addWeighted(img_cv,0.7,imstack,0.3,0)
#            else:
#                frame_counter=0
            cv2.imshow('image',img_cv)
            frame_counter+=1
            if cv2.waitKey(25) & 0xFF == ord('q'):
                cv2.destroyAllWindows()
                cap.release()
                break

NameError: name 'detection_graph' is not defined

In [None]:
import cv2
img1 = cv2.imread("cat_icon.png")
img2 = cv2.imread("mesi-vs-ronaldo-1846777.jpg")
img1 = 255 - img1
cv2.imshow('res',img1)
cv2.waitKey(0)
cv2.destroyAllWindows()

In [9]:
import cv2
img1 = cv2.imread("cat_icon.png")
img1 = cv2.resize(img1,(120,120))
img2 = cv2.imread("mesi-vs-ronaldo-1846777.jpg")
rows,cols,channels = img1.shape
img2[0:rows, 0:cols ] = img1
cv2.imshow('res',img2)
cv2.waitKey(0)
cv2.destroyAllWindows()


In [22]:
# Load two images
img1 = cv2.imread('mesi-vs-ronaldo-1846777.jpg')
img2 = cv2.imread('cat_icon.png')

# I want to put logo on top-left corner, So I create a ROI
rows,cols,channels = img2.shape
roi = img1[0:rows, 0:cols ]
roi

array([[[ 25,  33,  40],
        [ 24,  32,  39],
        [ 24,  32,  39],
        ...,
        [ 32,  36,  41],
        [ 32,  36,  41],
        [ 32,  36,  41]],

       [[ 25,  33,  40],
        [ 25,  33,  40],
        [ 24,  32,  39],
        ...,
        [ 33,  37,  42],
        [ 33,  37,  42],
        [ 33,  37,  42]],

       [[ 26,  34,  41],
        [ 25,  33,  40],
        [ 24,  32,  39],
        ...,
        [ 35,  39,  44],
        [ 35,  39,  44],
        [ 35,  39,  44]],

       ...,

       [[ 70,  68, 127],
        [ 69,  67, 126],
        [ 68,  66, 125],
        ...,
        [ 50,  55, 104],
        [ 49,  54, 103],
        [ 47,  52, 101]],

       [[ 69,  68, 124],
        [ 69,  68, 124],
        [ 67,  66, 122],
        ...,
        [ 50,  55, 104],
        [ 49,  54, 103],
        [ 47,  52, 101]],

       [[ 69,  69, 123],
        [ 68,  68, 122],
        [ 67,  67, 121],
        ...,
        [ 50,  55, 104],
        [ 49,  54, 103],
        [ 47,  52, 101]]

In [64]:
def add_icon_position(original_image, icon_logo,roi_coords):
    
    rows,cols,channels = original_image.shape

    ## Normalized coordinates
    (left, right, top, bottom) = (roi_coords[0] * rows, roi_coords[1] * rows, 
                                  roi_coords[2] * cols, roi_coords[3] * cols)

    (left, right, top, bottom) = tuple(map(int,(left, right, top, bottom)))
    print("converted coords from normalized: ", (left, right, top, bottom))

    new_width = right - left
    new_height = bottom - top

    icon_logo = cv2.resize(icon_logo, (new_width, new_height))
    roi =  original_image[top:bottom,left:right]

    # Now create a mask of logo and create its inverse mask also
    # in this case, is already B/W
    mask = cv2.cvtColor(icon_logo,cv2.COLOR_BGR2GRAY)
    mask_inv = cv2.bitwise_not(mask)

    # Now black-out the area of logo in ROI
    original_image_bg = cv2.bitwise_and(roi,roi,mask = mask)

    # Take only region of logo from logo image.
    icon_logo_fg = cv2.bitwise_and(icon_logo,icon_logo,mask = mask_inv)

    # Put logo in ROI and modify the main image
    dst = cv2.add(original_image_bg,icon_logo_fg)
    original_image[top:bottom,left:right] = dst
    
    return(original_image)
    

In [66]:
import cv2
# Load two images
img1 = cv2.imread('mesi-vs-ronaldo-1846777.jpg')
img2 = cv2.imread('cat_icon.png')
new_img = add_icon_position(img1,img2,[0.3,0.4,0.4,0.5])
cv2.imshow('res',new_img)
cv2.waitKey(0)
cv2.destroyAllWindows()

converted coords from normalized:  (70, 94, 136, 170)


In [1]:
import cv2
# Load two images
roi_coords = [0,0.5,0,0.5]
original_image = cv2.imread('mesi-vs-ronaldo-1846777.jpg')
icon_logo = cv2.imread('cat_icon.png')

rows,cols,channels = original_image.shape

## Normalized coordinates
(left, right, top, bottom) = (roi_coords[0] * rows, roi_coords[1] * rows, 
                              roi_coords[2] * cols, roi_coords[3] * cols)

(left, right, top, bottom) = tuple(map(int,(left, right, top, bottom)))
print("converted coords from normalized: ", (left, right, top, bottom))

new_width = right - left
new_height = bottom - top

icon_logo = cv2.resize(icon_logo, (new_width, new_height))
roi =  original_image[top:bottom,left:right]

# Now create a mask of logo and create its inverse mask also
# in this case, is already B/W
mask = cv2.cvtColor(icon_logo,cv2.COLOR_BGR2GRAY)
mask_inv = cv2.bitwise_not(mask)

# Now black-out the area of logo in ROI
original_image_bg = cv2.bitwise_and(roi,roi,mask = mask)

# Take only region of logo from logo image.
icon_logo_fg = cv2.bitwise_and(icon_logo,icon_logo,mask = mask_inv)

# Put logo in ROI and modify the main image
dst = cv2.add(original_image_bg,icon_logo_fg)
original_image[top:bottom,left:right] = dst

cv2.imshow('res',original_image)
cv2.waitKey(0)
cv2.destroyAllWindows()

converted coords from normalized:  (0, 117, 0, 170)


In [16]:
mytuple

(1, 2, 3, 3)