In [4]:
import numpy as np
import sys
import cv2
from math import pow, sqrt
from PIL import Image

In [5]:
#Loading Siren to display alert msg
s=cv2.imread("siren.jpg")
half = cv2.resize(s, (0, 0), fx = 0.1, fy = 0.1)

In [6]:
#to get grdient background for output
def get_gradation_2d(start, stop, width, height, is_horizontal):
    if is_horizontal:
        return np.tile(np.linspace(start, stop, width), (height, 1))
    else:
        return np.tile(np.linspace(start, stop, height), (width, 1)).T
def get_gradation_3d(width, height, start_list, stop_list, is_horizontal_list):
    result = np.zeros((height, width, len(start_list)), dtype=np.float)

    for i, (start, stop, is_horizontal) in enumerate(zip(start_list, stop_list, is_horizontal_list)):
        result[:, :, i] = get_gradation_2d(start, stop, width, height, is_horizontal)

    return result

In [7]:
labels = [line.strip() for line in open("class_labels.txt")]

In [8]:
# Load model
print("\nLoading model...\n")

network = cv2.dnn.readNetFromCaffe("SSD_MobileNet_prototxt.txt","SSD_MobileNet.caffemodel")
print("\nStreaming video using device...\n")



# Capture video from file or through device
cap = cv2.VideoCapture("vtest.avi")

while cap.isOpened():
    bg_img = get_gradation_3d(1200, 800, (255, 0, 0), (255, 255, 64), (True, False, False))
    bg_img=Image.fromarray(np.uint8(bg_img))
    

    # Capture one frame after another
    ret, frame = cap.read()

    if not ret:
        break

    (h, w) = frame.shape[:2]

    # Resize the frame to suite the model requirements. Resize the frame to 300X300 pixels
    blob = cv2.dnn.blobFromImage(cv2.resize(frame, (300, 300)), 0.007843, (300, 300), 127.5)
    
    network.setInput(blob)
    detections = network.forward()
    

    pos_dict = dict()
    coordinates = dict()
    pair_dict=dict()
    bir=dict()

    # Focal length
    F = 615
    #birds eye view window
    image=np.zeros((1200,1200,3),np.uint8)
    bg=np.zeros((1400,800,3),np.uint8)
    for i in range(detections.shape[2]):
        
        confidence = detections[0, 0, i, 2]

        if confidence > 0.2:

            class_id = int(detections[0, 0, i, 1])

            box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
            (startX, startY, endX, endY) = box.astype('int')

            # Filtering only persons detected in the frame. Class Id of 'person' is 15
            if class_id == 15.00:

                label = "{}: {:.2f}%".format(labels[class_id], confidence * 100)
                print("{}".format(label))


                coordinates[i] = (startX, startY, endX, endY)

                # Mid point of bounding box
                x_mid = round((startX+endX)/2,4)
                y_mid = round((startY+endY)/2,4)
                bir[i]=(int(x_mid),int(y_mid))

                height = round(endY-startY,4)

                # Distance from camera based on triangle similarity
                distance = (165 * F)/height
                print("Distance(cm):{dist}\n".format(dist=distance))

                # Mid-point of bounding boxes (in cm) based on triangle similarity technique
                x_mid_cm = (x_mid * distance) / F
                y_mid_cm = (y_mid * distance) / F
                pos_dict[i] = (x_mid_cm,y_mid_cm,distance)

    k=0
    # Distance between every object detected in a frame
    close_objects = set()
    for i in pos_dict.keys():
        for j in pos_dict.keys():
            #print(i)
            if i < j:
                dist = sqrt(pow(pos_dict[i][0]-pos_dict[j][0],2) + pow(pos_dict[i][1]-pos_dict[j][1],2) + pow(pos_dict[i][2]-pos_dict[j][2],2))

                # Check if distance less than 2 metres or 200 centimetres
                if dist < 200:
                    close_objects.add(i)
                    close_objects.add(j)
                    pair_dict[k] = ( (int(bir[i][0]),int(bir[i][1])),(int(bir[j][0]),int(bir[j][1])) )
                    k+=1
                    
    flag=0
    for i in pos_dict.keys():
        if i in close_objects:
            COLOR = (0,0,255)
            flag=1
            
        else:
            COLOR = (0,255,0)
        
        cv2.circle(image,(bir[i][0],bir[i][1]),10,COLOR,-1)

        (startX, startY, endX, endY) = coordinates[i]
        
        
        
        cv2.rectangle(frame, (startX, startY), (endX, endY), COLOR, 2)
        y = startY - 15 if startY - 15 > 15 else startY + 15
        # Convert cms to feet
        cv2.putText(frame, 'Depth: {i} ft'.format(i=round(pos_dict[i][2]/30.48,4)), (startX, y),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, COLOR, 2)
        
    for k in pair_dict.keys():
        cv2.line(image, pair_dict[k][0], pair_dict[k][1], (0,0,255), 4)
        
        
    width=int(image.shape[1]*40 / 100)
    height=int(image.shape[1]*30 / 100)
    #dsize
    dsize=(width,height)
    image=cv2.resize(image,dsize)
    image=cv2.copyMakeBorder(image,10,10,10,10,cv2.BORDER_CONSTANT,(0,0,255))
    
    frame=Image.fromarray(frame)
    image=Image.fromarray(image)
    
    bg_img.paste(frame,(20,20))
    bg_img.paste(image,(800,300))
    bg_img=np.array(bg_img)
    
    cv2.putText(bg_img,"Birds's Eye View",(800,250),cv2.FONT_HERSHEY_SIMPLEX,1,(128,0,0),2)


    if flag==0:
        
        cv2.putText(bg_img,"Normal",(100,650),cv2.FONT_HERSHEY_SIMPLEX,1,(128,0,0),2)
        

    else:
        
        cv2.putText(bg_img,"Alert",(100,650),cv2.FONT_HERSHEY_SIMPLEX,1,(0,0,255),2)
        #putting siren
        bg_img[620:654,240:274]=half

        


    bg_img=Image.fromarray(bg_img)

    
    cv2.imshow("Final",np.array(bg_img))

    

    key = cv2.waitKey(1) & 0xFF

    # Press `q` to exit
    if key == ord("q"):
        break

# Clean
cap.release()
cv2.destroyAllWindows()



Loading model...


Streaming video using device...

person: 53.98%
Distance(cm):994.8529411764706

person: 25.92%
Distance(cm):1300.9615384615386

person: 56.70%
Distance(cm):994.8529411764706

person: 25.38%
Distance(cm):1252.7777777777778

person: 53.69%
Distance(cm):1014.75

person: 39.23%
Distance(cm):1179.9418604651162

person: 26.02%
Distance(cm):1025.0

person: 36.25%
Distance(cm):1179.9418604651162

person: 56.99%
Distance(cm):1193.8235294117646

person: 30.27%
Distance(cm):1166.3793103448277

person: 42.40%
Distance(cm):1127.5

person: 26.12%
Distance(cm):1317.857142857143

person: 33.20%
Distance(cm):1153.125

person: 29.44%
Distance(cm):1140.1685393258426

person: 34.53%
Distance(cm):1193.8235294117646

person: 28.35%
Distance(cm):1035.4591836734694

person: 31.01%
Distance(cm):1140.1685393258426

person: 25.35%
Distance(cm):1208.0357142857142

person: 42.38%
Distance(cm):867.3076923076923

person: 29.00%
Distance(cm):1068.157894736842

person: 50.05%
Distance(cm):914.18918