<h1>Object Detection</h1>
<h2>Importing the libraries</h2>

In [1]:
import cv2 
import numpy as np
from imutils.video import FPS
import time
from imutils.video import VideoStream

In [2]:
def displayImage(image, text):
    cv2.imshow(text, image)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

<h2>Initalizing class labels</h2>

In [3]:
CLASSES = ["background", "aeroplane", "bicycle", "bird", "boat",
	"bottle", "bus", "car", "cat", "chair", "cow", "diningtable",
	"dog", "horse", "motorbike", "person", "pottedplant", "sheep",
	"sofa", "train", "tvmonitor"]
COLORS = np.random.uniform(0, 255, size=(len(CLASSES), 3))


<h2>Loading the model</h2>

In [5]:
net = cv2.dnn.readNetFromCaffe('SSD_MobileNet_prototxt.txt', 'SSD_MobileNet.caffemodel') 
image = cv2.imread('/Users/samridhagarwal/Pictures/mandy and me 2.jpg') 
(h,w) = image.shape[:2]
#Creating a blob from the image
blob = cv2.dnn.blobFromImage(cv2.resize(image, (300,300)), 0.007843, (300, 300), 127.5)

<h2>Detecting Objects</h2>

In [9]:
net.setInput(blob)
detections = net.forward()
for i in range(0, detections.shape[2]):
    confidence = detections[0, 0, i, 2] # confidence of the prediction
    if confidence > 0.2: 
        idx = int(detections[0,0,i,1])
        box = detections[0, 0, i, 3:7] * np.array([w, h, w, h]) #getting the x-y coordinates of the bounding box
        (startX, startY, endX, endY) = box.astype("int")
        text = "{}: {:.2f}%".format(CLASSES[idx], confidence * 100)
        #drawing the bounding box and the confidence of the prediction
        y = startY - 15 if startY - 15 > 15 else startY+15
        cv2.rectangle(image, (startX, startY), (endX, endY), (0,0,255), 2)
        cv2.putText(image, text, (startX, y),cv2.FONT_HERSHEY_SIMPLEX, 0.5, COLORS[idx], 2)
            
displayImage(image, 'Intial Dectetions')

<h2>Detecting Objects in real-time using webcam</h2>

In [4]:
#laoding the model
model = cv2.dnn.readNetFromCaffe('SSD_MobileNet_prototxt.txt', 'SSD_MobileNet.caffemodel') 
#start video capture
vs = VideoStream(src=0).start()
time.sleep(2.0)
fps = FPS().start()


<h2>Detecting objects</h2>

In [5]:
while True:
    frame = vs.read() #reading the frame
    (h, w, d) = frame.shape
    # resizing the frame
    r = 700.0/ w
    dim = (700, int(h*r))
    frame = cv2.resize(frame, dim)
    (h,w) = frame.shape[:2]
    blob = cv2.dnn.blobFromImage(cv2.resize(frame, (300, 300)), 0.007843, (300, 300),127.5)
    model.setInput(blob)
    detections = model.forward()

    for i in range(0, detections.shape[2]):
        confidence = detections[0, 0, i, 2] # confidence of the prediction
        if confidence > 0.2: 
            idx = int(detections[0,0,i,1])
            box = detections[0, 0, i, 3:7] * np.array([w, h, w, h]) #getting the x-y coordinates of the bounding box
            (startX, startY, endX, endY) = box.astype("int")
            text = "{}:{:.2f}%".format(CLASSES[idx], confidence*100)
            #drawing the bounding box and the confidence of the prediction
            y = startY - 15 if startY - 15 > 15 else startY+15
            cv2.rectangle(frame, (startX, startY), (endX, endY), (0,0,255), 2)
            cv2.putText(frame, text, (startX, y), cv2.FONT_HERSHEY_SIMPLEX, 0.45, COLORS[idx], 2)
            cv2.imshow("Image", frame)
            key = cv2.waitKey(1) & 0xFF
            if key == ord("q"):
                break
            fps.update()
fps.stop()
cv2.destroyAllWindows()
vs.stop()

