# object detection using mobile net ssd

In [None]:
pip install pillow



In [None]:
from PIL import Image,ImageSequence
import numpy as np
import argparse
import imutils
import time
import cv2
from google.colab.patches import cv2_imshow
from imutils.video import FPS



In [None]:
from google.colab import files
files.upload()

Saving MobileNetSSD_deploy.prototxt.txt to MobileNetSSD_deploy.prototxt (1).txt


{'MobileNetSSD_deploy.prototxt (1).txt': b'name: "MobileNet-SSD"\ninput: "data"\ninput_shape {\n  dim: 1\n  dim: 3\n  dim: 300\n  dim: 300\n}\nlayer {\n  name: "conv0"\n  type: "Convolution"\n  bottom: "data"\n  top: "conv0"\n  param {\n    lr_mult: 1.0\n    decay_mult: 1.0\n  }\n  param {\n    lr_mult: 2.0\n    decay_mult: 0.0\n  }\n  convolution_param {\n    num_output: 32\n    pad: 1\n    kernel_size: 3\n    stride: 2\n    weight_filler {\n      type: "msra"\n    }\n    bias_filler {\n      type: "constant"\n      value: 0.0\n    }\n  }\n}\nlayer {\n  name: "conv0/relu"\n  type: "ReLU"\n  bottom: "conv0"\n  top: "conv0"\n}\nlayer {\n  name: "conv1/dw"\n  type: "Convolution"\n  bottom: "conv0"\n  top: "conv1/dw"\n  param {\n    lr_mult: 1.0\n    decay_mult: 1.0\n  }\n  param {\n    lr_mult: 2.0\n    decay_mult: 0.0\n  }\n  convolution_param {\n    num_output: 32\n    pad: 1\n    kernel_size: 3\n    group: 32\n    engine: CAFFE\n    weight_filler {\n      type: "msra"\n    }\n    bias

In [None]:
args = {
    "gif": "/content/real_time.gif",
    "prototxt" : "/content/MobileNetSSD_deploy.prototxt.txt",
    "model": "/content/MobileNetSSD_deploy.caffemodel",
    "confidence": 0.5
}

In [None]:
CLASSES = [
    "background", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus",
    "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike",
    "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"
]
COLORS = np.random.uniform(0, 255, size=(len(CLASSES), 3))

In [None]:
print("[INFO] Loading model from disk...")
net = cv2.dnn.readNetFromCaffe(args["prototxt"], args["model"])

[INFO] Loading model from disk...


In [None]:
fps = FPS().start()

In [None]:
print("[INFO] processing GIF...")
gif = Image.open(args["gif"])

[INFO] processing GIF...


In [None]:
# loop over the frames from the GIF
for frame_pil in ImageSequence.Iterator(gif):
	# grab the frame from the GIF and resize it
	# to have a maximum width of 400 pixels.
	# Convert the PIL Image to an OpenCV-compatible format (NumPy array)
	# and then from RGB (Pillow) to BGR (OpenCV).
	frame = np.array(frame_pil.convert('RGB'))
	frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
	frame = imutils.resize(frame, width=400)

	# grab the frame dimensions and convert it to a blob
	(h, w) = frame.shape[:2]
	blob = cv2.dnn.blobFromImage(cv2.resize(frame, (300, 300)),
		0.007843, (300, 300), 127.5)

	# pass the blob through the network and obtain the detections and
	# predictions
	net.setInput(blob)
	detections = net.forward()

	# loop over the detections
	for i in np.arange(0, detections.shape[2]):
		# extract the confidence (i.e., probability) associated with
		# the prediction
		confidence = detections[0, 0, i, 2]

		# filter out weak detections by ensuring the confidence is
		# greater than the minimum confidence
		if confidence > args["confidence"]:
			# extract the index of the class label from the
			# detections, then compute the (x, y)-coordinates of
			# the bounding box for the object
			idx = int(detections[0, 0, i, 1])
			box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
			(startX, startY, endX, endY) = box.astype("int")

			# draw the prediction on the frame
			label = "{}: {:.2f}%".format(CLASSES[idx],
				confidence * 100)
			cv2.rectangle(frame, (startX, startY), (endX, endY),
				COLORS[idx], 2)
			y = startY - 15 if startY - 15 > 15 else startY + 15
			cv2.putText(frame, label, (startX, y),
				cv2.FONT_HERSHEY_SIMPLEX, 0.5, COLORS[idx], 2)

	# show the output frame
	cv2_imshow(frame)
	# Use a small waitKey delay to show the GIF at a viewable speed
	key = cv2.waitKey(40) & 0xFF

	# if the q key was pressed, break from the loop
	if key == ord("q"):
		break

	# update the FPS counter
	fps.update()

# stop the timer and display FPS information
fps.stop()
print("[INFO] elapsed time: {:.2f}".format(fps.elapsed()))
print("[INFO] approx. FPS: {:.2f}".format(fps.fps()))

# do a bit of cleanup
cv2.destroyAllWindows()