In [4]:

# Import OpenVINO modules
import numpy as np
import openvino
from openvino.inference_engine import IECore, IENetwork
import openvino.runtime as ov
import time
import math
from itertools import chain

LABELS = open("person-detection-0202/coco.names").read().strip().split('\n')
SAFE_DISTANCE = 60
np.random.seed(42)
COLORS = np.random.randint(0, 255, size=(len(LABELS), 3), dtype='uint8')
anchors = [[116,90, 156,198, 373,326], [30,61, 62,45, 59,119], [10,13, 16,30, 33,23]]
class_threshold = 0.6

# Load the IR model files
model_xml = "person-detection-0202/yolo-v3-tf.xml"
model_bin = "person-detection-0202/yolo-v3-tf.bin"

ie = IECore()
net = ie.read_network(model=model_xml, weights=model_bin)

# Get the input and output layer names
input_blob = next(iter(net.input_info))
output_blob = next(iter(net.outputs))

# Load the network to the device (CPU, GPU, etc.)
core = ov.Core()
model = core.compile_model(model_xml,"CPU")
#Creates object consisting of bounding box params 
class BoundBox:
    def __init__(self, xmin, ymin, xmax, ymax, objness = None, classes = None):
        self.xmin = xmin
        self.ymin = ymin
        self.xmax = xmax
        self.ymax = ymax
        self.objness = objness
        self.classes = classes
        self.label = -1
        self.score = -1
 
    def get_label(self):
        if self.label == -1:
            self.label = np.argmax(self.classes)
 
        return self.label
 
    def get_score(self):
        if self.score == -1:
            self.score = self.classes[self.get_label()]
 
        return self.score
 
#Activation function
def _sigmoid(x):
    return 1. / (1. + np.exp(-x))
    
#To decode the net output and return boxes 
def decode_netout(netout, anchors, obj_thresh, net_h, net_w):
    grid_h, grid_w = netout.shape[:2]
    # print("grid_h:",grid_h,"grid_w:",grid_w)
    nb_box = 3
    netout = netout.reshape((grid_h, grid_w, nb_box, -1))
    print("@############################",netout.shape)
    nb_class = netout.shape[-1] - 5
    print("nb_class",nb_class)
    boxes = []
    netout[..., :2]  = _sigmoid(netout[..., :2])
    netout[..., 4:]  = _sigmoid(netout[..., 4:])
    netout[..., 5:]  = netout[..., 4][..., np.newaxis] * netout[..., 5:]
    netout[..., 5:] *= netout[..., 5:] > obj_thresh
    print(netout[0][0])
    obj=[] 
    for i in range(grid_h*grid_w):
        row = i / grid_w
        col = i % grid_w
        for b in range(nb_box):
            # 4th element is objectness score
            objectness = netout[int(row)][int(col)][b][4]
            obj.append(objectness)
            if(objectness.all() <= obj_thresh): continue
            # first 4 elements are x, y, w, and h
            x, y, w, h = netout[int(row)][int(col)][b][:4]
            x = (col + x) / grid_w # center position, unit: image width
            y = (row + y) / grid_h # center position, unit: image height
            w = anchors[2 * b + 0] * np.exp(w) / net_w # unit: image width
            h = anchors[2 * b + 1] * np.exp(h) / net_h # unit: image height
            # last elements are class probabilities
            classes = netout[int(row)][col][b][5:]
            # print("classes",len(classes))
            box = BoundBox(x-w/2, y-h/2, x+w/2, y+h/2, objectness, classes)
            boxes.append(box)
    # print(obj)
    return boxes
#Correct the sizes of the bounding boxes for the shape of the image
def correct_yolo_boxes(boxes, image_h, image_w, net_h, net_w):
    new_w, new_h = net_w, net_h
    box=[]
    confidence=[]
    for i in range(len(boxes)):
        x_offset, x_scale = (net_w - new_w)/2./net_w, float(new_w)/net_w
        y_offset, y_scale = (net_h - new_h)/2./net_h, float(new_h)/net_h
        boxes[i].xmin = int((boxes[i].xmin - x_offset) / x_scale * image_w)
        boxes[i].xmax = int((boxes[i].xmax - x_offset) / x_scale * image_w)
        boxes[i].ymin = int((boxes[i].ymin - y_offset) / y_scale * image_h)
        boxes[i].ymax = int((boxes[i].ymax - y_offset) / y_scale * image_h)
        # print(boxes[i].get_label())
        # break
        confidence.append(boxes[i].objness)
        box.append([boxes[i].xmin,boxes[i].ymin,boxes[i].xmax,boxes[i].ymax])
    
    return box,confidence

def _interval_overlap(interval_a, interval_b):
    x1, x2 = interval_a
    x3, x4 = interval_b
    if x3 < x1:
        if x4 < x1:
            return 0
        else:
            return min(x2,x4) - x1
    else:
        if x2 < x3:
             return 0
        else:
            return min(x2,x4) - x3
 
def bbox_iou(box1, box2):
    intersect_w = _interval_overlap([box1.xmin, box1.xmax], [box2.xmin, box2.xmax])
    intersect_h = _interval_overlap([box1.ymin, box1.ymax], [box2.ymin, box2.ymax])
    intersect = intersect_w * intersect_h
    w1, h1 = box1.xmax-box1.xmin, box1.ymax-box1.ymin
    w2, h2 = box2.xmax-box2.xmin, box2.ymax-box2.ymin
    union = w1*h1 + w2*h2 - intersect
    return float(intersect) / union
 
def do_nms(boxes, nms_thresh):
    if len(boxes) > 0:
        nb_class = len(boxes[0].classes)
    else:
        return
    for c in range(nb_class):
        sorted_indices = np.argsort([-box.classes[c] for box in boxes])
        for i in range(len(sorted_indices)):
            index_i = sorted_indices[i]
            if boxes[index_i].classes[c] == 0: continue
            for j in range(i+1, len(sorted_indices)):
                index_j = sorted_indices[j]
                if bbox_iou(boxes[index_i], boxes[index_j]) >= nms_thresh:
                    boxes[index_j].classes[c] = 0
                    
# define the labels
labels = ["person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck",
    "boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench",
    "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe",
    "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard",
    "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
    "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana",
    "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake",
    "chair", "sofa", "pottedplant", "bed", "diningtable", "toilet", "tvmonitor", "laptop", "mouse",
    "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator",
    "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"]

def get_boxes(boxes, labels, thresh):
    v_boxes, v_labels, v_scores = list(), list(), list()
    # enumerate all boxes
    for box in boxes:
        # enumerate all possible labels
        for i in range(len(labels)):
            # check if the threshold for this label is high enough
            if box.classes[i] > thresh:
                v_boxes.append(box)
                v_labels.append(labels[i])
                v_scores.append(box.classes[i]*100)
                # don't break, many labels may trigger for one box
    return v_boxes, v_labels, v_scores
 
# draw all results
def draw_boxes(frame, v_boxes, v_labels, v_scores, min_dist):
    cal_point=[]
    for i in range(len(v_boxes)):
        box = v_boxes[i]
        # get coordinates
        y1, x1, y2, x2 = box.ymin, box.xmin, box.ymax, box.xmax
        width, height = x2 - x1, y2 - y1
        mid_x, mid_y = (width//2)+x1, y2
        cal_point.append((mid_x,mid_y))
        frame = cv2.rectangle(frame, (x1,y1), (x2,y2), (255,255,0), 1)
        label = "%s (%.3f) %s" % (v_labels[i], v_scores[i], i)
        t_point_x=x1-14
        t_point_y=y1-3
        frame = cv2.putText(frame, label, (t_point_x,t_point_y), cv2.FONT_HERSHEY_SIMPLEX, 0.35, color=(255,255,255), thickness=1)

    print("\nMid point coordinates :")
    print(cal_point,"\n")
    for i in range(len(cal_point)):
        for j in range(i+1,len(cal_point)):
            d=round(math.sqrt(math.pow(cal_point[j][0]-cal_point[i][0],2)+math.pow(cal_point[j][1]-cal_point[i][1],2)),3)
            print("boxes :", i,":", j,"Distance :", d)
            if(d<=min_dist):
                box_i,box_j = v_boxes[i],v_boxes[j]
                y1i, x1i, y2i, x2i = box_i.ymin, box_i.xmin, box_i.ymax, box_i.xmax
                y1j, x1j, y2j, x2j = box_j.ymin, box_j.xmin, box_j.ymax, box_j.xmax
                frame = cv2.rectangle(frame, (x1i,y1i), (x2i,y2i), (0,0,255), 1)
                frame = cv2.rectangle(frame, (x1j,y1j), (x2j,y2j), (0,0,255), 1)

# Define a function to compute the Euclidean distance between two points
def euclidean_distance(a, b):
    return np.sqrt(np.sum((a - b) ** 2))

#Resize the frame
def load_image_pixels(filename, shape):
    height, width, c = filename.shape
    # load the image with the required size
    image = cv2.resize(filename, shape)
    print("Frame_Feed_To_Model  :",image.shape)
    # scale pixel values to [0, 1]
    image = image.astype('float32')
    image /= 255.0
    # add a dimension so that we have one sample
    image = np.expand_dims(image, 0)
    return image, width, height

# Define a threshold for the minimum distance between people
distance_threshold = 200 # pixels

# Read and preprocess the input video
import cv2
import numpy as np

#give the input video
video = cv2.VideoCapture("pedestrians.mp4")
_,frame = video.read()
width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
fourcc = cv2.VideoWriter_fourcc(*"MJPG")

#give the output address to store the video 
writer = cv2.VideoWriter('social_distance_openvino_optimized_version.avi', fourcc, 30,(width,height), True)
(W, H) = (None, None)
# used to record the time when we processed last frame
prev_frame_time = 0

# used to record the time at which we processed current frame
new_frame_time = 0
 
while True:
    # Read a frame from the video.
    ret, frame = video.read()
    if not ret:
        break # Exit the loop if end of video or error
    if W is None or H is None:
        H, W = (frame.shape[0], frame.shape[1])
        
    # Preprocess the frame
    input_w, input_h = 416, 416
    image, image_w, image_h = load_image_pixels(frame, (input_w, input_h))

    # Run inference and get the output
    infer_request = model.create_infer_request()
    input_shape = [1,416, 416,3]   
    input_tensor= ov.Tensor(image)
    input_tensor.shape = input_shape
    infer_request.set_tensor(input_blob,input_tensor)
    infer_request.infer()
    yhat=[]
    for i in range(3):
        output_tensor = infer_request.get_output_tensor(i)
        output=output_tensor.data
        output = output.transpose((0,2,3,1))
        # print(output.shape)
        yhat.append(output)
    # print(yhat[0].shape)
    # break
    # Parse the output and get the bounding boxes of detected people
    boxes = []
    class_ids = []
    lines = []
    
    new_frame_time = time.time()
    fps = 1/(new_frame_time-prev_frame_time)
    prev_frame_time = new_frame_time
    fps = int(fps)
    fps = str(fps)
    for i in range(len(yhat)):
        print("len:",len(boxes))
        boxes += decode_netout(yhat[i][0], anchors[i], class_threshold, input_h, input_w)
    # print("box_list size: ",len(boxes))
    n = len(boxes)
    boxes,confidences=correct_yolo_boxes(boxes, image_h, image_w, input_h, input_w)
    # print(confidences)
    # break
    # print(boxes)
    # Apply non-maximum suppression to eliminate redundant overlapping boxes
    indices = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)
    print(indices)
    break
    # Loop over the indices of the remaining boxes
    v = 0
    for i in indices:
        box = boxes[i]
        # Draw a bounding box around the person
        cv2.rectangle(frame, (box[0], box[1]), (box[2], box[3]), (0, 255, 0), 2)
        # Get the center point of the box
        center_a = np.array([box[0] + (box[2] - box[0]) / 2, box[1] + (box[3] - box[1]) / 2])
        
        # Loop over the other indices of the remaining boxes
        for j in indices:
            if i != j: # Avoid comparing with itself
                box_b = boxes[j]
                # Get the center point of the other box
                center_b = np.array([box_b[0] + (box_b[2] - box_b[0]) / 2, box_b[1] + (box_b[3] - box_b[1]) / 2])
               
                # Compute the distance between the two points
                distance = euclidean_distance(center_a, center_b)
                
                # Check if the distance is below the threshold
                if distance < distance_threshold:
                    # Draw a red line between the two points
                    v+=1
                    cv2.line(frame, (int(center_a[0]), int(center_a[1])), (int(center_b[0]), int(center_b[1])), (0, 0, 255), 2)

    # Show the output frame
    cv2.namedWindow("Social Distance Detector", cv2.WINDOW_NORMAL)
    cv2.resizeWindow("Social Distance Detector", 800, 600)
    cv2.putText(frame,'Number of Violations : '+str(v),(80,frame.shape[0]-10),cv2.FONT_HERSHEY_SIMPLEX,3,(0,0,255),3)
    
    cv2.putText(frame,"FPS :"+ fps, (7,70), cv2.FONT_HERSHEY_SIMPLEX, 3, (200,0,0), 3, cv2.LINE_AA)
    cv2.imshow("Social Distance Detector", frame)
    writer.write(frame)
    
    
    
    
    # Wait for a key press to exit
    key = cv2.waitKey(1) & 0xFF
    if key == ord("q"):
        break

# Release the video and destroy the windows
video.release()
cv2.destroyAllWindows()


Frame_Feed_To_Model  : (416, 416, 3)
len: 0
@############################ (13, 13, 3, 85)
nb_class 80
[[ 0.6471471   0.6520426   0.27050552 -0.60449237  0.5000005   0.
   0.          0.          0.          0.          0.          0.
   0.          0.          0.          0.          0.          0.
   0.          0.          0.          0.          0.          0.
   0.          0.          0.          0.          0.          0.
   0.          0.          0.          0.          0.          0.
   0.          0.          0.          0.          0.          0.
   0.          0.          0.          0.          0.          0.
   0.          0.          0.          0.          0.          0.
   0.          0.          0.          0.          0.          0.
   0.          0.          0.          0.          0.          0.
   0.          0.          0.          0.          0.          0.
   0.          0.          0.          0.          0.          0.
   0.          0.          0.          0