In [1]:
import numpy as np
import cv2
import sys
import os.path
import matplotlib.pyplot as plt
# matplotlib inline


In [2]:
import matplotlib
matplotlib.rcParams['figure.figsize'] = (15.0,15.0)
matplotlib.rcParams['image.cmap'] = 'gray'

Step 1: Initialize the parameters

The YOLO3 algorithm generates bounding boxes as the predicted detection outputs. Every predicted box is associated with a confidience score. In the first stage, all the boxes below the confidence threshold parameter are ignored for futher processing.

 The rest of the boxes undergo non-maxinum suppression which removes redundant overlapping bounding boxes. Non-maximum suppression is controlled by a parameter nmsThreshold. You can try to change these values and see how the numkber of the output predicted boxes changes.
 
Next, the default values for the input width and height for the networkd input image are set. We set each of them to 416 so that we can compare 

In [3]:
# Initialize the parameters
objectnessThreshold = 0.5 # Objectness threshold
confThreshold = 0.5       # Confidence threshold
nmsThreshold = 0.4        # Non-maximum suppression threshold
inpWidth = 416            # Width of network's input image
inpHeight = 416           # Height of network's input image

Step 2: Load the model and classes 

The file coco.names contains all the objects for which the model was trained. We read class names

Next, we load the network which has two part

- Yolo weights and yolo conf

We set the DNN backend to OpenCV here and the target CPU. You could try setting the preferable target to cv.dnn.DNN_TARGET_OPENCL to run it on a GPU. But keep in mind that the current OpenCV version is tested only with Intel’s GPUs, it would automatically switch to CPU, if you do not have an Intel GPU.

In [6]:
# Load names of classes
classesFile = "coco.names"
classes = None

with open(classesFile, 'rt') as f:
    classes = f.read().rstrip('\n').split('\n')
    
# Give the configuration and weights files for the model and load the network using them
modelConfiguration = "yolov3.cfg"
modelWeights = "yolov3.weights"

net = cv2.dnn.readNetFromDarknet(modelConfiguration, modelWeights)

Step 3: Process each frame

The input image is passed through the network and the output is decoded and displayed using a few utility functions. Let us go over the utility functions



Step 3a: Getting the names of output layers

The forward function in OpenCV's Net class needs the ending layer till which it should run in the network. Since we want to run through the whole network, we need to identify the last layer of the network. We do that by uisng the functiongetUnconnectedOutlayers()

In [None]:
def getOutputsNames(net):
    # Get the names of all the layers in the network
    layersNames = net.getLayerNames()
    return [layersNames[i[0] - 1] for i in net.getUnconnectedOutLayers()]

In [8]:
# Draw the predicted boundingb ox
def drawPred (classId, conf, left, top, right, bottom):
    # Draw a bounding box
    cv2.rectangle(frame, (left, top), (right, bottom), (255, 178, 50), 3)
    label = '%.2f' % conf
    
    # Get the label for the class name and its confidence
    if classes:
        assert(classId < len(classes))
        label = '%s:%s' % (classes[classId], label)
    # Display the label at the top of the bouding box
    labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
    top = max(top, labelSize[1])
    top = max(top, labelSize[1])
    cv2.rectangle(frame,(left, top - round(1.5*labelSize[1])), (left + round(1.5*labelSize[0]), top + baseLine), (255, 255, 255), cv2.FILLED)
    cv2.putText(frame, label, (left, top), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (0,0,0), 1)