# <center> YOLO BASED SOCIAL DISTANCING VIOLATION MONITOR </center>

## Problem statement:
### To identify whether the human-beings are maintaining social distance norms or not from a real-time or recorded video footage. It will help to spread awareness among the population.

**1. SUPPORTING FILES**<br>
    ***A."coco.names"     :*** https://github.com/pjreddie/darknet/blob/master/data/coco.names<br>
    ***B."yolov3.cfg"     :*** https://github.com/pjreddie/darknet/blob/master/cfg/yolov3.cfg <br>
    ***C."yolov3.weights" :*** https://drive.google.com/file/d/1tIxUU2cWiK422X77lykuMVlTrH0Ff7rl/view?usp=share_link<br>

## Model Information:
**MODEL USED:** YOLO V3

<center>----------------------------------------------------------------------------------------------------------</center>

In [1]:
#import all required libraries 
from scipy.spatial import distance as dist
import numpy as np
import imutils
import cv2
import os
import time

In [2]:
# initialize minimum threshold for object detection
MIN_CONF   = 0.40
NMS_THRESH = 0.20

# define the minimum safe pixel distance 
MIN_DISTANCE = 50

In [3]:
# load the COCO class 
labelsPath = os.path.sep.join(["coco.names"])
LABELS = open(labelsPath).read().strip().split("\n")

#print the labes
print('Labels present in COCO: \n',LABELS,'\n')
print('Number of labels :\n',len(LABELS),'\n')

weightsPath = os.path.sep.join([ "yolov3.weights"])
configPath  = os.path.sep.join([ "yolov3.cfg"])

# load the YOLO data trained on COCO dataset 
print("Extracted...")
net = cv2.dnn.readNetFromDarknet(configPath, weightsPath)

# determine only the "output" layer names
layer_names  = net.getLayerNames()
output_layer = [layer_names[i-1] for i in net.getUnconnectedOutLayers()]
print(output_layer)

Labels present in COCO: 
 ['person', 'bicycle', 'car', 'motorbike', 'aeroplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'sofa', 'pottedplant', 'bed', 'diningtable', 'toilet', 'tvmonitor', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'] 

Number of labels :
 80 

Extracted...
['yolo_82', 'yolo_94', 'yolo_106']


## Streaming the video clip

In [4]:
video_stream = cv2.VideoCapture("test_video.mp4")

fps = video_stream.get(cv2.CAP_PROP_FPS)
print("FPS of the current video: ",fps)

num_frames = video_stream.get(cv2.CAP_PROP_FRAME_COUNT)
print("Number of frames in the video: ",num_frames)

writer = None

FPS of the current video:  30.0
Number of frames in the video:  11784.0


## Defining the Distance

In [5]:
#Euclidean distance 
def euclidean_dist(p1, p2):
    
    return ((p1[0] - p2[0]) ** 2 +  (p1[1] - p2[1]) ** 2) ** 0.5

def isclose(p1, p2):
    
    calculated_distance = euclidean_dist(p1, p2)
    calib = (p1[1] + p2[1]) / 2

    if 0 < calculated_distance < 0.15 * calib:
        return 1

    elif 0 < calculated_distance < 0.2 * calib:
        return 2

    else:
        return 0

    
height,width=(None,None)
q=0

## Defining the function to detect People

In [6]:
def detect_people(frame, net, output_layer, personIdx=0):
    # dimensions of the frame 
    (H, W) = frame.shape[:2]
    results = []

    blob = cv2.dnn.blobFromImage(frame, 1 / 255.0, (416, 416), swapRB=True, crop=False)
    net.setInput(blob)
    layerOutputs = net.forward(output_layer)

    boxes = []
    centroids = []
    confidences = []

    # loop over each of the layer outputs
    for output in layerOutputs:
        # loop over each of the detections
        for detection in output:
            
            scores = detection[5:]
            classID = np.argmax(scores)
            confidence = scores[classID]
           
            if classID == personIdx and confidence > MIN_CONF:
                
                box = detection[0:4] * np.array([W, H, W, H])
                (centerX, centerY, width, height) = box.astype("int")
               
                x = int(centerX - (width / 2))
                y = int(centerY - (height / 2))
              
                boxes.append([x, y, int(width), int(height)])
                centroids.append((centerX, centerY))
                confidences.append(float(confidence))

    
    idxs = cv2.dnn.NMSBoxes(boxes, confidences, MIN_CONF, NMS_THRESH)

    # ensure at least one detection exists
    if len(idxs) > 0:
        # loop over the indexes we are keeping
        for i in idxs.flatten():
            # extract the bounding box coordinates
            (x, y) = (boxes[i][0], boxes[i][1])
            (w, h) = (boxes[i][2], boxes[i][3])

            r = (confidences[i], (x, y, x + w, y + h), centroids[i])
            results.append(r)

    return results

## Save the output frames as a video

In [7]:
display = 1
output = "output_video.avi"

In [8]:
while True:
    # read the next frame from the file
    (grabbed, frame) = video_stream.read()

    
    if not grabbed:
        break

    # resize the frame and then detect people (and only people) in it
    frame = imutils.resize(frame, width=700)
    results = detect_people(frame, net, output_layer, personIdx=LABELS.index("person"))
    height,width=frame.shape[:2]

    violate = set()

   
    if len(results) >= 2:
        
        centroids = np.array([r[2] for r in results])
        D = dist.cdist(centroids, centroids, metric="euclidean")

       
        for i in range(0, D.shape[0]):
            for j in range(i + 1, D.shape[1]):
              
                if D[i, j] < MIN_DISTANCE:
                   
                    violate.add(i)
                    violate.add(j)

    # loop over the results
    for (i, (prob, bbox, centroid)) in enumerate(results):
       
        (startX, startY, endX, endY) = bbox
        (cX, cY) = centroid
        color = (0, 255, 0)
       
        if i in violate:
            color = (0, 0, 255)
        
        cv2.rectangle(frame, (startX, startY), (endX, endY), color, 2)
        cv2.putText(frame, str(round(prob * 100))+"%", (startX - 5, startY - 5), cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 0), 1)
        cv2.circle(frame, (cX, cY), 5, color, 1)
        

    # draw the total number of social distancing violations on the output frame
    text = "Social Distancing Violations Detected: {}".format(len(violate))
    cv2.putText(frame, text, (10, frame.shape[0] - 25), cv2.FONT_HERSHEY_SIMPLEX, 0.85, (0, 255, 255), 2)
    cv2.putText(frame,'People Detected:{}'.format(len(results)), (10, height - 75),cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 255), 2)
    

    if display > 0:
        # show the output frame
        cv2.imshow("Frame", frame)
            
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
   
    if output != "" and writer is None:
        # initialize our video writer
        fourcc = cv2.VideoWriter_fourcc(*"MJPG")
        writer = cv2.VideoWriter(output, fourcc, 25, (frame.shape[1], frame.shape[0]), True)

    if writer is not None:
        writer.write(frame)

cv2.destroyAllWindows()

<center>-------------------------------------------------END------------------------------------------------</center>