In [1]:
import numpy as np
import cv2
import os
import time

# At least OpenCV 4.4.0 is required to run YOLOv4


# Load a pre-trained YOLOv3 model from disk
net = cv2.dnn.readNetFromDarknet("model/YOLO-COCO/yolov4.cfg","model/YOLO-COCO/yolov4.weights")
net.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV)
net.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU)        
# Determine only the output layer names that we need from YOLO
olayer_name = net.getLayerNames()
#print( olayer_name )
olayer_name = [ olayer_name[i[0] - 1] for i in net.getUnconnectedOutLayers()]



# Input Image BGR
capture = cv2.VideoCapture(0)
ret,frame = capture.read()
capture.release()

# Create a 4D blob from a frame a a preprocessing step
# This function includes options to do
# - mean subtraction
# - resize or scale values by scalefactor
# - crop (from center)
# - swap blue and red channels
# Be careful, each model requires different preprocessing!!!
(h,w) = frame.shape[:2]

yolo_imgW = 416             # width of the network input image
yolo_imgH = 416             # height of the network input image
blob = cv2.dnn.blobFromImage( frame,
                                1 / 255.0,                # scaleFactor
                                (yolo_imgW, yolo_imgH),   # spatial size of the CNN
                                swapRB=True, crop=False)

# Pass the blob to the network
net.setInput(blob)
outputs = net.forward(olayer_name)



In [2]:
outputs[0][1]

array([1.3059810e-02, 1.6438799e-02, 2.8836997e-02, 3.8010102e-02,
       1.8340777e-05, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e

In [21]:


    # Lists to store detected bounding boxes, confidences and classIDs
    boxes = []
    confidences = []
    classIDs = []

    # Loop over each of the layer outputs
    for output in outputs:
        # Loop over each of the detections
        for detection in output:
            # Extract the confidence (i.e., probability) and classID
            scores = detection[5:]
            classID = np.argmax(scores)
            confidence = scores[classID]

            # Filter out weak detections by ensuring the confidence is greater than the threshold
            if confidence > 0:


                # Compute the (x, y)-coordinates of the bounding box
                box = detection[0:4] * np.array( [w,h,w,h] )
                (centerX, centerY, width, height) = box.astype('int')
                x = int(centerX - (width / 2))
                y = int(centerY - (height / 2))

                # Add a new bounding box to our list
                boxes.append([x, y, int(width), int(height)])
                confidences.append(float(confidence))
                classIDs.append(classID)
    boxes

return 


In [25]:
boxes

[[-14, 132, 596, 350],
 [-13, 132, 595, 349],
 [12, 131, 628, 351],
 [17, 132, 618, 350],
 [57, 130, 597, 352],
 [59, 129, 593, 353]]

In [26]:
confidences

[0.23407423496246338,
 0.2557812035083771,
 0.9193888902664185,
 0.9289783835411072,
 0.70229572057724,
 0.7005264163017273]

In [22]:
# Params for YOLOv3
confident_constant = 0.5    # confidence threshold
threshold_constant = 0.3    # non-maxium suppression threshold

idxs = cv2.dnn.NMSBoxes(boxes, confidences, confident_constant, threshold_constant)

In [29]:
idxs.flatten()

array([3], dtype=int32)

In [24]:
np.array(boxes)

array([[-14, 132, 596, 350],
       [-13, 132, 595, 349],
       [ 12, 131, 628, 351],
       [ 17, 132, 618, 350],
       [ 57, 130, 597, 352],
       [ 59, 129, 593, 353]])

In [17]:
confidences

[0.23407423496246338,
 0.2557812035083771,
 0.9193888902664185,
 0.9289783835411072,
 0.70229572057724,
 0.7005264163017273]

In [18]:
classIDs

[0, 0, 0, 0, 0, 0]