In [2]:
import numpy as np
import argparse
import cv2
import os
import time

In [3]:
labels = open('model/coco.names').read().strip().split('\n')
net = cv2.dnn.readNetFromDarknet('model/yolov3.cfg', 'model/yolov3.weights')
# Create a list of colors for the labels
colors = np.random.randint(0, 255, size=(len(labels), 3), dtype='uint8')

net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA)

layer_names = net.getLayerNames()
layer_names = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()]


In [4]:

def extract_boxes_confidences_classids(outputs, confidence, width, height):
    boxes = []
    confidences = []
    classIDs = []

    for output in outputs:
        for detection in output:            
            # Extract the scores, classid, and the confidence of the prediction
            scores = detection[5:]
            classID = np.argmax(scores)
            conf = scores[classID]
            
            # Consider only the predictions that are above the confidence threshold
            if conf > confidence:
                # Scale the bounding box back to the size of the image
                box = detection[0:4] * np.array([width, height, width, height])
                centerX, centerY, w, h = box.astype('int')

                # Use the center coordinates, width and height to get the coordinates of the top left corner
                x = int(centerX - (w / 2))
                y = int(centerY - (h / 2))

                boxes.append([x, y, int(w), int(h)])
                confidences.append(float(conf))
                classIDs.append(classID)

    return boxes, confidences, classIDs


def draw_bounding_boxes(image, boxes, confidences, classIDs, idxs, colors):
    if len(idxs) > 0:
        for i in idxs.flatten():
            # extract bounding box coordinates
            x, y = boxes[i][0], boxes[i][1]
            w, h = boxes[i][2], boxes[i][3]

            # draw the bounding box and label on the image
            if(classIDs[i]==0):
                color = [int(c) for c in colors[classIDs[i]]]
                cv2.rectangle(image, (x, y), (x + w, y + h), (0,0,255), 2)
                text = "{}: {:.4f}".format('Person', confidences[i])
                cv2.putText(image, text, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

            if(classIDs[i]==2):
                color = [int(c) for c in colors[classIDs[i]]]
                cv2.rectangle(image, (x, y), (x + w, y + h), (255,0,0), 2)
                text = "{}: {:.4f}".format('car', confidences[i])
                cv2.putText(image, text, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

    return image


def make_prediction(net, layer_names, labels, image, confidence, threshold):
    height, width = image.shape[:2]
    
    # Create a blob and pass it through the model
    blob = cv2.dnn.blobFromImage(image, 1 / 255.0, (416, 416), swapRB=True, crop=False)
    net.setInput(blob)
    outputs = net.forward(layer_names)

    # Extract bounding boxes, confidences and classIDs
    boxes, confidences, classIDs = extract_boxes_confidences_classids(outputs, confidence, width, height)

    # Apply Non-Max Suppression
    idxs = cv2.dnn.NMSBoxes(boxes, confidences, confidence, threshold)

    return boxes, confidences, classIDs, idxs


In [12]:
cap = cv2.VideoCapture('project.avi')

# Read Slow and resize
Slow = cv2.imread("SlowDown.png")
size = 100
Slow = cv2.resize(Slow, (size, size))
# Create a mask of Slow
img2gray = cv2.cvtColor(Slow, cv2.COLOR_BGR2GRAY)
ret, mask = cv2.threshold(img2gray, 1, 255, cv2.THRESH_BINARY)

while cap.isOpened():
    ret, image = cap.read()
    if not ret:
        print('Video file finished.')
        break
    image = cv2.resize(image,(512,512))
    croped_image = image#[:,120:400]
    start_time = time.time()
    boxes, confidences, classIDs, idxs = make_prediction(net, layer_names, labels, croped_image, 0.5, 0.3)
    end_time = time.time()
    # for key,value in 
    croped_image = draw_bounding_boxes(croped_image, boxes, confidences, classIDs, idxs, colors)
    cv2.putText(img=croped_image, text='FPS: '+str(round(1.0/(end_time-start_time),2)), org=(0, 20), fontFace=cv2.FONT_HERSHEY_TRIPLEX, fontScale=0.5, color=(0, 0, 255),thickness=1)
    
    # Flip the frame
    #croped_image = cv2.flip(croped_image, 1)
    # Region of Image (ROI), where we want to insert logo
    roi = croped_image[-size-10:-10, -size-10:-10]
    # Set an index of where the mask is
    roi[np.where(mask)] = 0
    roi += Slow

    cv2.imshow('YOLO Object Detection', croped_image)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()