In [15]:
import cv2
import imutils
import numpy as np
import argparse
import time
from math import pow, sqrt
from imutils.video import FPS

# Parse the arguments from command line
parser = argparse.ArgumentParser()

parser.add_argument('-v', '--video', type = str, default = 'demo_Trim.mp4', help = 'Video file path. If no path is given, video is captured using device.')

parser.add_argument("-i", "--image", type = str, default=None, help="path to Image File ")

parser.add_argument('-m', '--model', default = 'SSD_MobileNet.caffemodel', help = "Path to the pretrained model.")
    
parser.add_argument('-p', '--prototxt', default = 'SSD_MobileNet_prototxt.txt', help = 'Prototxt of the model.')

parser.add_argument('-l', '--labels', default = 'class_labels.txt', help = 'Labels of the dataset.')

parser.add_argument('-c', '--confidence', type = float, default = 0.8, help='Set confidence for detecting objects')

args = parser.parse_args(args=[])


labels  = ["background", "aeroplane", "bicycle", "bird", "boat","bottle", "bus", "car", "cat", "chair", "cow", "diningtable",
            "dog", "horse", "motorbike", "person", "pottedplant", "sheep","sofa", "train", "tvmonitor"]


# Generate random bounding_box_color for each label
COLORS = np.random.uniform(0, 255, size=(len(labels), 3))


# Load model
print("\nLoading model...\n")
network = cv2.dnn.readNetFromCaffe(args.prototxt, args.model)

if args.video:
    cap = cv2.VideoCapture(args.video)
else:
    cap = cv2.VideoCapture(0)
    
time.sleep(2.0)
fps = FPS().start()
frame_no = 0

while cap.isOpened():

    frame_no = frame_no+1

    # Capture one frame after another
    ret, frame = cap.read()

    if not ret:
        break

    (h, w) = frame.shape[:2]

    # Resize the frame to suite the model requirements. Resize the frame to 300X300 pixels
    blob = cv2.dnn.blobFromImage(cv2.resize(frame, (300, 300)), 0.007843, (300, 300), 127.5)

    network.setInput(blob)
    detections = network.forward()
    person = 0    
    # loop over the detections
    for i in np.arange(0, detections.shape[2]):
        # extract the confidence (i.e., probability) associated with the prediction
        confidence = detections[0, 0, i, 2]
        # filter out weak detections by ensuring the `confidence` is greater than the minimum confidence
        if confidence > args.confidence:
            # extract the index of the class label from the`detections`, then compute the (x, y)-coordinates of the bounding box for the object
            idx = int(detections[0, 0, i, 1])
            box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
            (startX, startY, endX, endY) = box.astype("int")
            if idx == 15:
                # draw the prediction on the frame
                label = "{}: {:.2f}%".format(labels[idx],confidence * 100)
                cv2.rectangle(frame, (startX, startY), (endX, endY),COLORS[idx], 2)
                x = startX - 15 if startX - 15 > 15 else startX + 15
                y = startY - 15 if startY - 15 > 15 else startY + 15
                cv2.putText(frame, label, (startX, y+30), cv2.FONT_HERSHEY_DUPLEX, 0.8, COLORS[idx], 5)  
                cv2.rectangle(frame, (x,y), (x+w,y+h), (0,255,0), 2)
                person = person + 1
                cv2.putText(frame, f'person {person}', (x,y), cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255,0), 8)
                cv2.putText(frame, f'Total Person(s) : {person}', (40,70), cv2.FONT_HERSHEY_DUPLEX, 2, (0,0,255), 6)

    cv2.namedWindow('Frame',cv2.WINDOW_NORMAL)

    # Show frame
    cv2.imshow('Frame', frame)
    cv2.resizeWindow('Frame',1000,800)

    key = cv2.waitKey(1) & 0xFF

    # Press `q` to exit
    if key == ord("q"):
        break
    # update the FPS counter
    fps.update()
# stop the timer and display FPS information
fps.stop()
print("Elapsed time: {:.2f}".format(fps.elapsed()))
print("FPS: {:.2f}".format(fps.fps()))
# Clean
cap.release()
cv2.destroyAllWindows()


Loading model...

Elapsed time: 8.30
FPS: 10.48
