In [1]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
import time
import argparse
import imutils
from scipy.spatial import distance as dist

In [2]:
#Set up...
confthreshold =0.5
nmsthreshold = 0.3
min_distance = 50
modelconfiguration='yolov4.cfg'
modelweight='yolov4.weights'
classfile='coco.names'
classnames=[]
with open(classfile,'rt') as f:
    classnames=f.read().rstrip('\n').split('\n')
net = cv2.dnn.readNet(modelweight, modelconfiguration)
net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA)

In [3]:
def detect_people(frame, net, ln, personIdx=0):
	# grab the dimensions of the frame and  initialize the list of
	# results
	(H, W) = frame.shape[:2]
	results = []
    # construct a blob from the input frame and then perform a forward
	# pass of the YOLO object detector, giving us our bounding boxes
	# and associated probabilities
	blob = cv2.dnn.blobFromImage(frame, 1 / 255.0, (416, 416),
		swapRB=True, crop=False)
	net.setInput(blob)
	layerOutputs = net.forward(ln)
	# initialize our lists of detected bounding boxes, centroids, and
	# confidences, respectively
	boxes = []
	centroids = []
	confidences = []
    # loop over each of the layer outputs
	for output in layerOutputs:
		# loop over each of the detections
		for detection in output:
			# extract the class ID and confidence (i.e., probability)
			# of the current object detection
			scores = detection[5:]
			classID = np.argmax(scores)
			confidence = scores[classID]
			# filter detections by (1) ensuring that the object
			# detected was a person and (2) that the minimum
			# confidence is met
			if classID == personIdx and confidence > confthreshold:
				# scale the bounding box coordinates back relative to
				# the size of the image, keeping in mind that YOLO
				# actually returns the center (x, y)-coordinates of
				# the bounding box followed by the boxes' width and
				# height
				box = detection[0:4] * np.array([W, H, W, H])
				(centerX, centerY, width, height) = box.astype("int")
				# use the center (x, y)-coordinates to derive the top
				# and left corner of the bounding box
				x = int(centerX - (width / 2))
				y = int(centerY - (height / 2))
				# update our list of bounding box coordinates,
				# centroids, and confidences
				boxes.append([x, y, int(width), int(height)])
				centroids.append((centerX, centerY))
				confidences.append(float(confidence))
    # apply non-maxima suppression to suppress weak, overlapping
	# bounding boxes
	idxs = cv2.dnn.NMSBoxes(boxes, confidences, confthreshold, nmsthreshold)
	# ensure at least one detection exists
	if len(idxs) > 0:
		# loop over the indexes we are keeping
		for i in idxs.flatten():
			# extract the bounding box coordinates
			(x, y) = (boxes[i][0], boxes[i][1])
			(w, h) = (boxes[i][2], boxes[i][3])
			# update our results list to consist of the person
			# prediction probability, bounding box coordinates,
			# and the centroid
			r = (confidences[i], (x, y, x + w, y + h), centroids[i])
			results.append(r)
	# return the list of results
	return results

In [6]:
#Take in an video source and label rect and names for each object.
def filedet(filename,wh):
    cap=cv2.VideoCapture(filename)
    prev_frame_time = 0
    new_frame_time = 0
    cv2.startWindowThread()
    if (cap.isOpened()== False): 
        print("Error opening video stream or file")
    while cap.isOpened():
        success,img=cap.read()
        if success == True:
            blob=cv2.dnn.blobFromImage(img,1/255,(wh,wh),[0,0,0],1,crop=False)
            net.setInput(blob)
            layerNames= net.getLayerNames()
            outputNames = [layerNames[i[0]-1] for i in net.getUnconnectedOutLayers()]
            outputs=net.forward(outputNames)
            img = imutils.resize(img, width=700)
#----------------------------------------------------------------------------------------
            results = detect_people(img, net, outputNames,
            personIdx=classnames.index("person"))
            violate = set()
            if len(results) >= 2:
                # extract all centroids from the results and compute the
                # Euclidean distances between all pairs of the centroids
                centroids = np.array([r[2] for r in results])
                D = dist.cdist(centroids, centroids, metric="euclidean")
                # loop over the upper triangular of the distance matrix
                for i in range(0, D.shape[0]):
                    for j in range(i + 1, D.shape[1]):
                        # check to see if the distance between any two
                        # centroid pairs is less than the configured number
                        # of pixels
                        if D[i, j] < min_distance:
                            # update our violation set with the indexes of
                            # the centroid pairs
                            violate.add(i)
                            violate.add(j)
            for (i, (prob, bbox, centroid)) in enumerate(results):
                # extract the bounding box and centroid coordinates, then
                # initialize the color of the annotation
                (startX, startY, endX, endY) = bbox
                (cX, cY) = centroid
                color = (0, 255, 0)
                # if the index pair exists within the violation set, then
                # update the color
                if i in violate:
                    color = (0, 0, 255)
                # draw (1) a bounding box around the person and (2) the
                # centroid coordinates of the person,
                cv2.rectangle(img, (startX, startY), (endX, endY), color, 2)
                cv2.circle(img, (cX, cY), 5, color, 1)
            # draw the total number of social distancing violations on the
            # output frame
            text = "Social Distancing Violations: {}".format(len(violate))
            cv2.putText(img, text, (10, img.shape[0] - 25),
                cv2.FONT_HERSHEY_SIMPLEX, 0.85, (0, 0, 255), 3)             
 #---------------------------------------------------------------------------------------------------               
            #find fps
            gray = img
            font = cv2.FONT_HERSHEY_SIMPLEX
            new_frame_time = time.time()
            fps = 1/(new_frame_time-prev_frame_time) 
            prev_frame_time = new_frame_time 
            fps = int(fps)
            fps = str(fps)
            cv2.putText(gray, fps, (7, 70), font, 1, (100, 255, 0), 3, cv2.LINE_AA)
            #open final window
            cv2.imshow('Img',img)
            #press q to quite the window
            if cv2.waitKey(25) & 0xFF == ord('q'):
                break
        else:
            break
    cap.release()
    cv2.destroyAllWindows()

In [9]:
filedet(filename='pedestrians.mp4',wh=320)