# Ali Iqtadar Jafri
## #gripmay21 - Computer Vision and IoT
## Task 3:- Social Distancing detector

In [19]:
# importing required libraries

import cv2 as cv
from scipy.spatial import distance as dist 
import numpy as np
import argparse
import imutils
import os

## YOLOv3 Configs:

In [20]:
# base path to YOLO directory
MODEL_PATH = "yolo-coco"

# initialize minimum probability to filter weak detections along with the
# threshold when applying non-maxim suppression
MIN_CONF = 0.3
NMS_THRESH = 0.3

# define the minimum safe distance (in pixels) that two people can be from each other
MIN_DISTANCE = 50

In [21]:
# load the COCO class labels our YOLO model was trained on
labelsPath = os.path.sep.join([MODEL_PATH, "coco.names"])
LABELS = open(labelsPath).read().strip().split("\n")

print(LABELS)

print(len(LABELS))

# derive the paths to the YOLO weights and model configuration
weightsPath = os.path.sep.join([MODEL_PATH, "yolov3.weights"])
configPath = os.path.sep.join([MODEL_PATH, "yolov3.cfg"])

# load our YOLO object detector trained on COCO dataset (80 classes)
print("[INFO] loading YOLO from disk...")
net = cv.dnn.readNetFromDarknet(configPath, weightsPath)

['person', 'bicycle', 'car', 'motorbike', 'aeroplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'sofa', 'pottedplant', 'bed', 'diningtable', 'toilet', 'tvmonitor', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']
80
[INFO] loading YOLO from disk...


## Reading the input video:

In [22]:
FLAGS = []

if __name__ == '__main__':
    parser = argparse.ArgumentParser()

    
    parser.add_argument('-w', '--weights',
        default='./yolo-coco/yolov3.weights')

    parser.add_argument('-cfg', '--config',
        default='./yolo-coco/yolov3.cfg')

    parser.add_argument('-v', '--video-path',
        default='Test Video.mp4')

    parser.add_argument('-vo', '--video-output-path',
        default='output_file.avi')

    parser.add_argument('-d', '--display', 
        default=1)
    parser.add_argument('-l', '--labels',
        default='./yolo-coco/coco.names')


    FLAGS, unparsed = parser.parse_known_args()


# Get the labels
    LABELS = open(FLAGS.labels).read().strip().split('\n')


# Load the weights and configutation to form the pretrained YOLOv3 model
    net = cv.dnn.readNetFromDarknet(FLAGS.config, FLAGS.weights)

    # Get the output layer names of the model
    ln = net.getLayerNames()
    ln = [ln[i[0] - 1] for i in net.getUnconnectedOutLayers()]

    

In [23]:
if FLAGS.video_path:
# initialize the video stream and pointer to output video file
    print("Reading the test-video")
# open input video if available else webcam stream
    vs = cv.VideoCapture(FLAGS.video_path if FLAGS.video_path else 0)
    writer = None

Reading the test-video


## Detect people in the test video:

In [26]:
def detect_people(frame, net, ln, personIdx=0):
    # grab dimensions of the frame and initialize the list of results
    (H, W) = frame.shape[:2]
    results = []

    # construct a blob from the input frame and then perfrom a forward pass
    # of the YOLO object detector, giving us the bounding boxes and
    # associated probabilities
    blob = cv.dnn.blobFromImage(frame, 1 / 255.0, (416, 416), swapRB=True, crop=False)
    net.setInput(blob)
    layerOutputs = net.forward(ln)

    # initialize lists of detected bounding boxes, centroids, and confidence
    boxes = []
    centroids = []
    confidences = []

    # loop over each of the layer outputs
    for output in layerOutputs:
        # loop over each of the detections
        for detection in output:
            # extract teh class ID and confidence(probability) of the current object detection
            scores = detection[5:]
            classID = np.argmax(scores)
            confidence = scores[classID]

            # filter detections by (1) ensuring that the object detected was a person and
            # (2) that the minimum confidence is met
            if classID == personIdx and confidence > MIN_CONF:
                # scale the bounding box coordinates back relative to the size of 
                # the image, keeping in mind that YOLO actually returns the center (x, y)-coordinates
                # of the bounding box followed by the boxes' width and height
                box = detection[0:4] * np.array([W, H, W, H])
                (centerX, centerY, width, height) = box.astype("int")

                # use the center (x,y)-coordinates to derive the top and left corner of 
                # the bounding box
                x = int(centerX - (width / 2))
                y = int(centerY - (height / 2))

                # update the list of bounding box coordinates, centroids and confidences
                boxes.append([x, y, int(width), int(height)])
                centroids.append((centerX, centerY))
                confidences.append(float(confidence))

    # apply non-maxima suppression to suppress weak, overlapping bounding boxes
    idxs = cv.dnn.NMSBoxes(boxes, confidences, MIN_CONF, NMS_THRESH)

    # ensure at least one detection exists
    if len(idxs) > 0:
        # loop over the indexes being kept
        for i in idxs.flatten():
            # extract the bounding box coordinates
            (x, y) = (boxes[i][0], boxes[i][1])
            (w, h) = (boxes[i][2], boxes[i][3])

            # update the results list to consist of the person prediction probability, 
            # bounding box coordinates, and the centroid
            r = (confidences[i], (x, y, x + w, y + h), centroids[i])
            results.append(r)

    # return the list of results
    return results


In [28]:
# loop over the frames from the video stream
while True:
    # read the next frame from the input video
    (grabbed, frame) = vs.read()

    # if the frame was not grabbed, then that's the end fo the stream 
    if not grabbed:
        break
    # resize the frame and then detect people (only people) in it
    frame = imutils.resize(frame, width=700)
    results = detect_people(frame, net, ln, personIdx=LABELS.index("person"))
    # initialize the set of indexes that violate the minimum social distance
    violate = set()
        # ensure there are at least two people detections (required in order to compute the
        # the pairwise distance maps)
    if len(results) >= 2:
        # extract all centroids from the results and compute the Euclidean distances
        # between all pairs of the centroids
        centroids = np.array([r[2] for r in results])
        D = dist.cdist(centroids, centroids, metric="euclidean")

        # loop over the upper triangular of the distance matrix
        for i in range(0, D.shape[0]):
            for j in range(i+1, D.shape[1]):
                # check to see if the distance between any two centroid pairs is less
                # than the configured number of pixels
                if D[i, j] < MIN_DISTANCE:
                    # update the violation set with the indexes of the centroid pairs
                    violate.add(i)
                    violate.add(j)

    # loop over the results
    for (i, (prob, bbox, centroid)) in enumerate(results):
        # extract teh bounding box and centroid coordinates, then initialize the color of the annotation
        (startX, startY, endX, endY) = bbox
        (cX, cY) = centroid
        color = (0, 255, 0)

        # if the index pair exists within the violation set, then update the color
        if i in violate:
            color = (0, 0, 255)

        # draw (1) a bounding box around the person and (2) the centroid coordinates of the person
        cv.rectangle(frame, (startX, startY), (endX, endY), color, 2)
        cv.circle(frame, (cX, cY), 5, color, 1)

    # draw the total number of social distancing violations on the output frame
    text = "Social Distancing Violations: {}".format(len(violate))
    cv.putText(frame, text, (10, frame.shape[0] - 25), cv.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255), 3)

    # check to see if the output frame should be displayed to the screen
    if FLAGS.display > 0:
        # show the output frame
        cv.imshow("Frame", frame)
        key = cv.waitKey(1) & 0xFF

        # if the 'q' key is pressed, break from the loop
        if key == ord("q"):
            break

    # if  the video writer has not been  as none
    if writer is None:
        # initialize the video writer
        fourcc = cv.VideoWriter_fourcc(*"MJPG")
        writer = cv.VideoWriter(FLAGS.video_output_path, fourcc, 25, (frame.shape[1], frame.shape[0]), True)

    # if the video writer is not None, write the frame to the output video file
    if writer is not None:
        print("--> Writing stream to output")
        writer.write(frame)



--> Writing stream to output
--> Writing stream to output
--> Writing stream to output
--> Writing stream to output
--> Writing stream to output
--> Writing stream to output
--> Writing stream to output
--> Writing stream to output
--> Writing stream to output
--> Writing stream to output
--> Writing stream to output
--> Writing stream to output
--> Writing stream to output
--> Writing stream to output
--> Writing stream to output
--> Writing stream to output
--> Writing stream to output
--> Writing stream to output
--> Writing stream to output
--> Writing stream to output
--> Writing stream to output
--> Writing stream to output
--> Writing stream to output
--> Writing stream to output
--> Writing stream to output
--> Writing stream to output
--> Writing stream to output
--> Writing stream to output
--> Writing stream to output
--> Writing stream to output
--> Writing stream to output
--> Writing stream to output
--> Writing stream to output
--> Writing stream to output
--> Writing st

--> Writing stream to output
--> Writing stream to output
--> Writing stream to output
--> Writing stream to output
--> Writing stream to output
--> Writing stream to output
--> Writing stream to output
--> Writing stream to output
--> Writing stream to output
--> Writing stream to output
--> Writing stream to output
--> Writing stream to output
--> Writing stream to output
--> Writing stream to output
--> Writing stream to output
--> Writing stream to output
--> Writing stream to output
--> Writing stream to output
--> Writing stream to output
--> Writing stream to output
--> Writing stream to output
--> Writing stream to output
--> Writing stream to output
--> Writing stream to output
--> Writing stream to output
--> Writing stream to output
--> Writing stream to output
--> Writing stream to output
--> Writing stream to output
--> Writing stream to output
--> Writing stream to output
--> Writing stream to output
--> Writing stream to output
--> Writing stream to output
--> Writing st