# Import libraries

In [1]:
import cv2
import numpy as np
import matplotlib.pyplot as plt

# List of categories that can be classified by MobileNet model

In [2]:
categories = { 0: 'background',
    1: 'aeroplane', 2: 'bicycle', 3: 'bird', 4: 'boat',
    5: 'bottle', 6: 'bus', 7: 'car', 8: 'cat', 9: 'chair',
    10: 'cow', 11: 'diningtable', 12: 'dog', 13: 'horse',
    14: 'motorbike', 15: 'person', 16: 'pottedplant',
    17: 'sheep', 18: 'sofa', 19: 'train', 20: 'tvmonitor' }

# load the pre-trained model using opencv dnn

In [3]:
net = cv2.dnn.readNetFromCaffe('MobileNetSSD_deploy.prototxt.txt', 'MobileNetSSD_deploy.caffemodel')

# randomly selecting color for the bounding box

In [4]:
colors = np.random.uniform(255, 0, size=(len(categories), 3))

In [5]:
# read input video
# cap = cv2.VideoCapture('walking.avi')
"""
use cap = cv2.VideoCapture(0), for webcam
""" 
cap = cv2.VideoCapture(0)
while True:
    # read each frame of the video
    ret, image = cap.read()
    # SSD accepts image size of (300,300). Resize the images
    resized_image = cv2.resize(image,(300,300))
    # create a blob of an image and normalize the image
    blob = cv2.dnn.blobFromImage(resized_image, 0.007843, (300, 300), (127.5, 127.5, 127.5), False)
    # feed blob to the model
    net.setInput(blob)
    # result from the model
    detections = net.forward()
    # to caculate scale factor
    (h, w) = resized_image.shape[:2]
    # iterate over all the detection result
    for i in range(detections.shape[2]):
        confidence = detections[0, 0, i, 2]
        # select if probability of detection is greater than 20%
        if confidence > 0.2: 
            class_id = int(detections[0, 0, i, 1])
            # postion of the detected object in sacled image
            startX = int(detections[0, 0, i, 3] * w) 
            startY = int(detections[0, 0, i, 4] * h)
            endX   = int(detections[0, 0, i, 5] * w)
            endY   = int(detections[0, 0, i, 6] * h)
            heightFactor = image.shape[0]/300.0  
            widthFactor = image.shape[1]/300.0 
            # map position of the detected object into original image
            startX = int(widthFactor * startX) 
            startY = int(heightFactor * startY)
            endX   = int(widthFactor * endX)
            endY   = int(heightFactor * endY)
            # draw rectangular box around each object  
            cv2.rectangle(image, (startX, startY), (endX, endY), (0, 255, 0))
            if class_id in categories:
                label = categories[class_id] + ": " + str(confidence)
                labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
                startY = max(startY, labelSize[1])
                cv2.rectangle(image, (startX, startY - labelSize[1]), (startX + labelSize[0], startY + baseLine), 
                              (0, 255, 0), cv2.FILLED)
                # write label of the object on the image
                cv2.putText(image, label, (startX, startY), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255))
    cv2.imshow("object detection result", image)
    # destroy the window by pressing key 'q'
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break
cap.release()
cv2.destroyAllWindows() 