In [4]:
import cv2
import datetime
import imutils
import numpy as np
from centroidtracker import CentroidTracker

protopath = "MobileNetSSD_deploy.prototxt"
modelpath = "MobileNetSSD_deploy.caffemodel"
detector = cv2.dnn.readNetFromCaffe(prototxt=protopath, caffeModel=modelpath)


CLASSES = ["background", "aeroplane", "bicycle", "bird", "boat",
           "bottle", "bus", "car", "cat", "chair", "cow", "diningtable",
           "dog", "horse", "motorbike", "person", "pottedplant", "sheep",
           "sofa", "train", "tvmonitor"]

# c_tracker = CentroidTracker(maxDisappeared=50, maxDistance=55)
# p_tracker = CentroidTracker(maxDisappeared=50, maxDistance=55)

c_tracker = CentroidTracker(maxDisappeared=30, maxDistance=55)
p_tracker = CentroidTracker(maxDisappeared=30, maxDistance=55)


def non_max_suppression_fast(boxes, overlapThresh):
    try:
        if len(boxes) == 0:
            return []

        if boxes.dtype.kind == "i":
            boxes = boxes.astype("float")

        pick = []

        x1 = boxes[:, 0]
        y1 = boxes[:, 1]
        x2 = boxes[:, 2]
        y2 = boxes[:, 3]

        area = (x2 - x1 + 1) * (y2 - y1 + 1)
        idxs = np.argsort(y2)

        while len(idxs) > 0:
            last = len(idxs) - 1
            i = idxs[last]
            pick.append(i)

            xx1 = np.maximum(x1[i], x1[idxs[:last]])
            yy1 = np.maximum(y1[i], y1[idxs[:last]])
            xx2 = np.minimum(x2[i], x2[idxs[:last]])
            yy2 = np.minimum(y2[i], y2[idxs[:last]])

            w = np.maximum(0, xx2 - xx1 + 1)
            h = np.maximum(0, yy2 - yy1 + 1)

            overlap = (w * h) / area[idxs[:last]]

            idxs = np.delete(idxs, np.concatenate(([last],
                                                   np.where(overlap > overlapThresh)[0])))

        return boxes[pick].astype("int")
    except Exception as e:
        print("Exception occurred in non_max_suppression : {}".format(e))


def main():
    
    tnc = []
    tnp = []
    
    cap = cv2.VideoCapture('VID-20210804-WA0005.mp4')

    while cap.isOpened():
        ret, frame = cap.read()
        
        if not ret:
            print("Video Ended")
            break
        
        frame = imutils.resize(frame, width=600)

        (H, W) = frame.shape[:2]

        blob = cv2.dnn.blobFromImage(frame, 0.007843, (W, H), 127.5)

        detector.setInput(blob)
        person_detections = detector.forward()  

        c_rects = []
        p_rects = []
        cs = []
        ps = []
        
        for i in np.arange(0, person_detections.shape[2]):
            confidence = person_detections[0, 0, i, 2]
            if confidence > 0.5:
                idx = int(person_detections[0, 0, i, 1])

                if CLASSES[idx] != "car" and CLASSES[idx] != "person":
                    continue
                    
                else:
                    if CLASSES[idx] == "car":

                        person_box = person_detections[0, 0, i, 3:7] * np.array([W, H, W, H])
                        (startX, startY, endX, endY) = person_box.astype("int")
                        c_rects.append(person_box)
                                              
                    else:

                        person_box = person_detections[0, 0, i, 3:7] * np.array([W, H, W, H])
                        (startX, startY, endX, endY) = person_box.astype("int")
                        p_rects.append(person_box)
                        
        c_boundingboxes = np.array(c_rects)
        c_boundingboxes = c_boundingboxes.astype(int)
        c_rects = non_max_suppression_fast(c_boundingboxes, 0.3)

        c_objects = c_tracker.update(c_rects)
        for (objectId, bbox) in c_objects.items():
            x1, y1, x2, y2 = bbox
            cx1 = int(x1)
            cy1 = int(y1)
            cx2 = int(x2)
            cy2 = int(y2)
            
            cv2.rectangle(frame, (cx1, cy1), (cx2, cy2), (0, 0, 255), 2)
            text = "ID: {}".format(objectId)
            cv2.putText(frame, text, (x1, y1-5), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 1)
            cs.append(objectId)
        if len(cs)==0:
            tnc.append(0)
        else:
            tnc.append(max(cs))
        
        p_boundingboxes = np.array(p_rects)
        p_boundingboxes = p_boundingboxes.astype(int)
        p_rects = non_max_suppression_fast(p_boundingboxes, 0.3)

        p_objects = p_tracker.update(p_rects)
        for (objectId, bbox) in p_objects.items():
            x1, y1, x2, y2 = bbox
            px1 = int(x1)
            py1 = int(y1)
            px2 = int(x2)
            py2 = int(y2)

            cv2.rectangle(frame, (px1, py1), (px2, py2), (0, 0, 255), 2)
            text = "ID: {}".format(objectId)
            cv2.putText(frame, text, (x1, y1-5), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 1)
            ps.append(objectId)
        if len(ps)==0:
            tnp.append(0)
        else:
            tnp.append(max(ps))

        cv2.imshow("Application", frame)
        key = cv2.waitKey(1)
        if key == ord('q'):
            break
            
    cap.release()
    cv2.destroyAllWindows()
    
    print("Total number of Car Detected : ", max(tnc)+1)
    print("Total number of Person Detected : ", max(tnp)+1)


main()

Video Ended
Total number of Car Detected :  15
Total number of Person Detected :  9
