# <center> YOLO BASED SOCIAL DISTANCING VIOLATION DETECTION </center>

<img src="Social-Distancing.png">

**Image Source:** https://www.google.com/search?q=social+distancing+coronavirus&sxsrf=ALeKk02Nhz4SVCHoz4pXx0VtA1tjBNSQTw:1624339411520&source=lnms&tbm=isch&sa=X&ved=2ahUKEwiozPjsv6rxAhU5zDgGHcqEAEsQ_AUoAXoECAEQAw&biw=1920&bih=969#imgrc=a9m2sVtnoygkJM

## Problem statement:
### To identify whether the human-beings are maintaining social distance norms or not from a real-time or recorded video footage. It will help to spread awareness among the population.

## Dataset:

**1. VIDEO**<br>
    ***A. Configuration :*** https://www.youtube.com/watch?v=BbZa4OnQrNk&t=3s<br>
    ***B. Format        :*** 720p 30FPS (MP4)<br>
    ***B. Location      :*** Kempegowda International Airport<br>
    
    
**2. SUPPORTING FILES**<br>
    ***A."coco.names"     :*** https://github.com/pjreddie/darknet/blob/master/data/coco.names<br>
    ***B."yolov3.cfg"     :*** https://github.com/pjreddie/darknet/blob/master/cfg/yolov3.cfg <br>
    ***C."yolov3.weights" :*** confidential<br>

## Model building:
**MODEL USED:** YOLO V3

## Machine Configuration:

**1. CPU**<br>
    A. Intel i7 9th Gen H(High-Graphics) Processor<br>
    B. 6 core 12 Thread<br>
    C. 16GB Ram - 2666 MHz<br>
    
**2. GPU ENGINE**<br>
    A. 4GB Nvidia GTX 1650 Discrete Graphics<br>
    B. 6 Core <br>
    C. CUDA Core 896 (proper setup with OpenCV)<br>
    D. Thermal Cooling System 6 Channel<br>

<center>----------------------------------------------------------------------------------------------------------</center>

In [1]:
#import all important libraries 
from scipy.spatial import distance as dist
import numpy as np
import imutils
import cv2
import os
import time

In [2]:
# initialize minimum threshold for object detection
MIN_CONF   = 0.40
NMS_THRESH = 0.50

# define the minimum safe distance 
MIN_DISTANCE = 50

In [3]:
# load the COCO class 
labelsPath = os.path.sep.join(["coco.names"])
LABELS = open(labelsPath).read().strip().split("\n")

#print the labes
print('Labels present in COCO: \n',LABELS,'\n')
print('Number of labels :\n',len(LABELS),'\n')

weightsPath = os.path.sep.join([ "yolov3.weights"])
configPath  = os.path.sep.join([ "yolov3.cfg"])

# load the YOLO data trained on COCO dataset 
print("Extracted...")
net = cv2.dnn.readNetFromDarknet(configPath, weightsPath)

Labels present in COCO: 
 ['person', 'bicycle', 'car', 'motorbike', 'aeroplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'sofa', 'pottedplant', 'bed', 'diningtable', 'toilet', 'tvmonitor', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'] 

Number of labels :
 80 

Extracted...


In [4]:
# determine only the "output" layer names
layer_names  = net.getLayerNames()
output_layer = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]

## Streaming the video clip

In [5]:
video_stream = cv2.VideoCapture("banglore_airport (2).mp4")

fps = video_stream.get(cv2.CAP_PROP_FPS)
print("FPS of the current video: ",fps)

num_frames = video_stream.get(cv2.CAP_PROP_FRAME_COUNT)
print("Number of frames in the video: ",num_frames)

writer = None

FPS of the current video:  30.0
Number of frames in the video:  8408.0


## Defining the Distance

In [6]:
#Euclidean distance 
def euclidean_dist(p1, p2):
    
    return ((p1[0] - p2[0]) ** 2 +  (p1[1] - p2[1]) ** 2) ** 0.5

def isclose(p1, p2):
    
    calculated_distance = euclidean_dist(p1, p2)
    calib = (p1[1] + p2[1]) / 2

    if 0 < calculated_distance < 0.15 * calib:
        return 1

    elif 0 < calculated_distance < 0.2 * calib:
        return 2

    else:
        return 0

    
height,width=(None,None)
q=0  

## Initial Model

In [None]:
while(video_stream.isOpened()):

    # Capture frame-by-frame
    ret, img = video_stream.read()  
    
    if not ret:
        break

    if width is None or height is None: 
        height,width = img.shape[:2]
        q = width


    img = img[0:height, 0:q]
    height,width = img.shape[:2]

    # Detecting objects 

    blob  = cv2.dnn.blobFromImage(img,0.00392, (416, 416), (0,0,0), True, crop=False)
    net.setInput(blob)
    start = time.time()
    outs  = net.forward(output_layer)
    end   = time.time()

     
    class_ids   = []
    confidences = []
    boxes = []

    for out in outs:

        for detection in out:

            scores     = detection[5:]
            class_id   = np.argmax(scores)
            confidence = scores[class_id]

            # 0.5 is the threshold for confidence

            if confidence > 0.5:

                center_x = int(detection[0] * width)
                center_y = int(detection[1] * height)

                w = int(detection[2] * width)
                h = int(detection[3] * height)
                x = int(center_x - w / 2)
                y = int(center_y - h / 2)
                
                boxes.append([x, y, w, h])
                confidences.append(float(confidence))
                class_ids.append(class_id)

    indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.5)

    font = cv2.FONT_HERSHEY_SIMPLEX    

    if len(indexes)>0:        

        status       = list()        
        idf          = indexes.flatten()        
        close_pair   = list()        
        s_close_pair = list()        
        center       = list()        
        dist         = list()        

        for i in idf:            
            (x, y) = (boxes[i][0], boxes[i][1])            
            (w, h) = (boxes[i][2], boxes[i][3])            
            center.append([int(x + w / 2), int(y + h / 2)])            
            status.append(0)            

        for i in range(len(center)):            
            for j in range(len(center)):                

                #compare the closeness of two values
                g=isclose(center[i], center[j])                
                if g ==1:                    

                    close_pair.append([center[i],center[j]])                    
                    status[i] = 1                    
                    status[j] = 1                    

                elif g == 2:                    

                    s_close_pair.append([center[i], center[j]])                    

                    if status[i] != 1:                        
                        status[i] = 2                        

                    if status[j] != 1:                        
                        status[j] = 2

        total_p = len(center)           
        high_risk_p = status.count(1)        
        safe_p = status.count(0)        
        kk = 0        

        for i in idf:            

            cv2.putText(img, "Social Distancing Detection", (0, 50),font,  1, (255, 255, 255), 2)                      
            sub_img = img[height - 120:height-20, 0:500]

            (x, y) = (boxes[i][0], boxes[i][1])            
            (w, h) = (boxes[i][2], boxes[i][3])        

            #color of the ractangle when is too close 

            if status[kk] == 1:                

                cv2.rectangle(img, (x, y), (x + w, y + h), (0, 0, 150), 2)

            else:                

                cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 2)

    
            kk += 1
       
    cv2.imshow('image',img)
    
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

    cv2.waitKey(1)
    fourcc = cv2.VideoWriter_fourcc(*'MJPG')
    output = cv2.VideoWriter('output_video_model1.avi',fourcc, 30.0, (img.shape[1], img.shape[0]))
    output.write(img)


video_stream.release()
output.release()
cv2.destroyAllWindows()

**OUTPUT IMAGE**

<img src="banglore_airport_output1.jpg">

## Defining the function to detect people

In [7]:
def detect_people(frame, net, output_layer, personIdx=0):
    # dimensions of the frame 
    (H, W) = frame.shape[:2]
    results = []

    blob = cv2.dnn.blobFromImage(frame, 1 / 255.0, (416, 416), swapRB=True, crop=False)
    net.setInput(blob)
    layerOutputs = net.forward(output_layer)

    boxes = []
    centroids = []
    confidences = []

    # loop over each of the layer outputs
    for output in layerOutputs:
        # loop over each of the detections
        for detection in output:
            
            scores = detection[5:]
            classID = np.argmax(scores)
            confidence = scores[classID]
           
            if classID == personIdx and confidence > MIN_CONF:
                
                box = detection[0:4] * np.array([W, H, W, H])
                (centerX, centerY, width, height) = box.astype("int")
               
                x = int(centerX - (width / 2))
                y = int(centerY - (height / 2))
              
                boxes.append([x, y, int(width), int(height)])
                centroids.append((centerX, centerY))
                confidences.append(float(confidence))

    
    idxs = cv2.dnn.NMSBoxes(boxes, confidences, MIN_CONF, NMS_THRESH)

    # ensure at least one detection exists
    if len(idxs) > 0:
        # loop over the indexes we are keeping
        for i in idxs.flatten():
            # extract the bounding box coordinates
            (x, y) = (boxes[i][0], boxes[i][1])
            (w, h) = (boxes[i][2], boxes[i][3])

            r = (confidences[i], (x, y, x + w, y + h), centroids[i])
            results.append(r)

    return results

## Save the output frames as a video

In [8]:
display = 1
output = "output_video_banglore_airport1.avi"

In [None]:
while True:
    # read the next frame from the file
    (grabbed, frame) = video_stream.read()

    
    if not grabbed:
        break

    # resize the frame and then detect people (and only people) in it
    frame = imutils.resize(frame, width=700)
    results = detect_people(frame, net, output_layer, personIdx=LABELS.index("person"))
    height,width=frame.shape[:2]

    violate = set()

   
    if len(results) >= 2:
        
        centroids = np.array([r[2] for r in results])
        D = dist.cdist(centroids, centroids, metric="euclidean")

       
        for i in range(0, D.shape[0]):
            for j in range(i + 1, D.shape[1]):
              
                if D[i, j] < MIN_DISTANCE:
                   
                    violate.add(i)
                    violate.add(j)

    # loop over the results
    for (i, (prob, bbox, centroid)) in enumerate(results):
       
        (startX, startY, endX, endY) = bbox
        (cX, cY) = centroid
        color = (0, 255, 0)
       
        if i in violate:
            color = (0, 0, 255)
        
        cv2.rectangle(frame, (startX, startY), (endX, endY), color, 2)
        cv2.putText(frame, str(round(prob * 100))+"%", (startX - 5, startY - 5), cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 0), 1)
        cv2.circle(frame, (cX, cY), 5, color, 1)
        

    # draw the total number of social distancing violations on the output frame
    text = "Social Distancing Violations: {}".format(len(violate))
    cv2.putText(frame, text, (10, frame.shape[0] - 25), cv2.FONT_HERSHEY_SIMPLEX, 0.85, (0, 255, 255), 2)
    cv2.putText(frame,'Total People Detected:{}'.format(len(results)), (10, height - 75),cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 255), 2)
    

    if display > 0:
        # show the output frame
        cv2.imshow("Frame", frame)
            
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
   
    if output != "" and writer is None:
        # initialize our video writer
        fourcc = cv2.VideoWriter_fourcc(*"MJPG")
        writer = cv2.VideoWriter(output, fourcc, 25, (frame.shape[1], frame.shape[0]), True)

    if writer is not None:
        writer.write(frame)

cv2.destroyAllWindows()

## Defining the function to detect vehicle

In [11]:
def detect_vehicle(frame, net, output_layer, personIdx=0):
    # dimensions of the frame 
    (H, W) = frame.shape[:2]
    results = []

    blob = cv2.dnn.blobFromImage(frame, 1 / 255.0, (416, 416), swapRB=True, crop=False)
    net.setInput(blob)
    layerOutputs = net.forward(output_layer)

    boxes = []
    centroids = []
    confidences = []

    # loop over each of the layer outputs
    for output in layerOutputs:
        # loop over each of the detections
        for detection in output:
            
            scores = detection[5:]
            classID = np.argmax(scores)
            confidence = scores[classID]
           
            if classID in personIdx and confidence > MIN_CONF:
                
                box = detection[0:4] * np.array([W, H, W, H])
                (centerX, centerY, width, height) = box.astype("int")
               
                x = int(centerX - (width / 2))
                y = int(centerY - (height / 2))
              
                boxes.append([x, y, int(width), int(height)])
                centroids.append((centerX, centerY))
                confidences.append(float(confidence))

    
    idxs = cv2.dnn.NMSBoxes(boxes, confidences, MIN_CONF, NMS_THRESH)

    # ensure at least one detection exists
    if len(idxs) > 0:
        # loop over the indexes we are keeping
        for i in idxs.flatten():
            # extract the bounding box coordinates
            (x, y) = (boxes[i][0], boxes[i][1])
            (w, h) = (boxes[i][2], boxes[i][3])

            r = (confidences[i], (x, y, x + w, y + h), centroids[i])
            results.append(r)

    return results

## Save the output frames as a video

In [12]:
display = 1
output = "output_video_model2.avi"

In [None]:
while True:
    # read the next frame from the file
    (grabbed, frame) = video_stream.read()

    
    if not grabbed:
        break

    # resize the frame and then detect people (and only people) in it
    frame = imutils.resize(frame, width=700)
    results = detect_vehicle(frame, net, output_layer, personIdx=[LABELS.index("motorbike"),LABELS.index("bicycle"),LABELS.index("car"),LABELS.index("truck"),LABELS.index("bus")])
    height,width=frame.shape[:2]

    violate = set()

   
    if len(results) >= 2:
        
        centroids = np.array([r[2] for r in results])
        D = dist.cdist(centroids, centroids, metric="euclidean")

       
        for i in range(0, D.shape[0]):
            for j in range(i + 1, D.shape[1]):
              
                if D[i, j] < MIN_DISTANCE:
                   
                    violate.add(i)
                    violate.add(j)

    # loop over the results
    for (i, (prob, bbox, centroid)) in enumerate(results):
       
        (startX, startY, endX, endY) = bbox
        (cX, cY) = centroid
        color = (0, 255, 0)
       
        if i in violate:
            color = (0, 0, 255)
        
        cv2.rectangle(frame, (startX, startY), (endX, endY), color, 2)
        cv2.putText(frame, str(round(prob * 100))+"%", (startX - 5, startY - 5), cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 0), 1)
        cv2.circle(frame, (cX, cY), 5, color, 1)
        

    # draw the total number of social distancing violations on the output frame
    text = "Social Distancing Violations: {}".format(len(violate))
    cv2.putText(frame, text, (10, frame.shape[0] - 25), cv2.FONT_HERSHEY_SIMPLEX, 0.85, (0, 255, 255), 2)
    cv2.putText(frame,'Total People Detected:{}'.format(len(results)), (10, height - 75),cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 255), 2)
    

    if display > 0:
        # show the output frame
        cv2.imshow("Frame", frame)
            
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
   
    if output != "" and writer is None:
        # initialize our video writer
        fourcc = cv2.VideoWriter_fourcc(*"MJPG")
        writer = cv2.VideoWriter(output, fourcc, 25, (frame.shape[1], frame.shape[0]), True)

    if writer is not None:
        writer.write(frame)

cv2.destroyAllWindows()

<center>-------------------------------------------------END------------------------------------------------</center>