## Social Distancing Detector using YOLO Algorithm

In [1]:
import numpy as np
import cv2
import matplotlib.pyplot as plt
from scipy.spatial import distance
import winsound

In [2]:
# Constants to be used in this code
CONF_SCORE = 0.3
THRESHOLD = 0.3
MIN_DIST = 250

In [3]:
def detect_people(frame,net,ln,pid):
    # grab the dimensions of the frame and  initialize the list of
    # results
    (H, W) = frame.shape[:2]
    results = []

    # construct a blob from the input frame and then perform a forward
    # pass of the YOLO object detector, giving us our bounding boxes
    # and associated probabilities
    blob = cv2.dnn.blobFromImage(frame, 1 / 255.0, (416, 416),swapRB=True, crop=False)
    net.setInput(blob)
    layerOutputs = net.forward(ln)

    # initialize our lists of detected bounding boxes, centroids, and
    # confidences, respectively
    boxes = []
    centroids = []
    confidences = []

    # loop over each of the layer outputs
    for output in layerOutputs:
        # loop over each of the detections
        for detection in output:
            # extract the class ID and confidence (i.e., probability)
            # of the current object detection
            scores = detection[5:]
            classID = np.argmax(scores)
            confidence = scores[classID]

            # filter detections by (1) ensuring that the object
            # detected was a person and (2) that the minimum
            # confidence is met
            if classID == pid and confidence > CONF_SCORE:
                # scale the bounding box coordinates back relative to
                # the size of the image, keeping in mind that YOLO
                # actually returns the center (x, y)-coordinates of
                # the bounding box followed by the boxes' width and
                # height
                box = detection[0:4] * np.array([W, H, W, H])
                (centerX, centerY, width, height) = box.astype("int")

                # use the center (x, y)-coordinates to derive the top
                # and and left corner of the bounding box
                x = int(centerX - (width / 2))
                y = int(centerY - (height / 2))

                # update our list of bounding box coordinates,
                # centroids, and confidences
                boxes.append([x, y, int(width), int(height)])
                centroids.append((centerX, centerY))
                confidences.append(float(confidence))

    # apply non-maxima suppression to suppress weak, overlapping
    # bounding boxes
    idxs = cv2.dnn.NMSBoxes(boxes, confidences, CONF_SCORE, THRESHOLD)

    # ensure at least one detection exists
    if len(idxs) > 0:
        # loop over the indexes we are keeping
        for i in idxs.flatten():
            # extract the bounding box coordinates
            (x, y) = (boxes[i][0], boxes[i][1])
            (w, h) = (boxes[i][2], boxes[i][3])

            # update our results list to consist of the person
            # prediction probability, bounding box coordinates,
            # and the centroid
            r = (confidences[i], (x, y, x + w, y + h), centroids[i])
            results.append(r)

    # return the list of results
    return results

In [4]:
# LABELS consist of all 80 classes of coco dataset
labelsPath = "yolo-coco\coco.names"
LABELS = open(labelsPath).read().strip().split("\n")

In [5]:
wtpath = "yolo-coco\yolov3.weights"
cfgpath = "yolo-coco\yolov3.cfg"
# It reads the network in darknet form
net = cv2.dnn.readNetFromDarknet(cfgpath, wtpath)

In [6]:
# YOLO architecture has 254 layers
ln = net.getLayerNames() # It returns list of all layers by name
# Out of 254 layers, we have to get layers from where output comes
# In YOLO, output comes from 3 layers
ln = [ln[i[0] - 1] for i in net.getUnconnectedOutLayers()]  # getUnconnectedOutLayers() returns output layers by index
# We extract layer names from index
ln

['yolo_82', 'yolo_94', 'yolo_106']

In [7]:
cap = cv2.VideoCapture(0)

In [8]:
while True:
    _,frame = cap.read()
    frame = cv2.resize(frame, (500,500))
    pid = LABELS.index('person')
    results = detect_people(frame,net,ln,pid)
    voilate = set() # Initialise an empty set
    if len(results) >= 2:
        # Make sure there are atleast two persons in the frame
        # Extract centroid from results and calculate distance between every pair of centroids
        centroids = np.array([r[2] for r in results])
        D = distance.cdist(centroids,centroids,metric='euclidean')
        # D contains distance between each centroid with all other centroids
        # It has redundant data ie. distance between x1,y1 and x2,y2 and also x2,y2 and x1,y1
        # So we only loop through upper traingular part
        for i in range(0,D.shape[0]):
            for j in range(i+1,D.shape[1]):
                # Check if the distance between pairs is less than minimum distance
                if D[i][j] < MIN_DIST:
                    # Update our voilation set with index of voilating centroids
                    voilate.add(i)
                    voilate.add(j)
    # looping over the results
    for (i, (prob, bbox, centroid)) in enumerate(results):
        # extract the bounding box and centroid coordinates, then
        # initialize the color of the annotation
        (startX, startY, endX, endY) = bbox
        (cX, cY) = centroid
        color = (0, 255, 0)

        # if the index pair exists within the violation set, then
        # update the color
        if i in voilate:
            color = (0, 0, 255)

        # draw (1) a bounding box around the person and (2) the
        # centroid coordinates of the person,
        cv2.rectangle(frame, (startX, startY), (endX, endY), color, 2)
        cv2.circle(frame, (cX, cY), 5, color, 1)
    text = "Social Distancing Violations: {}".format(len(voilate))
    cv2.putText(frame, text, (10, frame.shape[0] - 25),
        cv2.FONT_HERSHEY_SIMPLEX, 0.85, (0, 0, 255), 3)
    if len(voilate)>0:
        winsound.PlaySound('beep.wav', winsound.SND_FILENAME)
    cv2.imshow("Frame",frame)
    key = cv2.waitKey(1) & 0xFF
    if key == ord("q"):
        break
cap.release()
cv2.destroyAllWindows()