In [1]:
import cv2
import argparse
import numpy as np

In [2]:
# read input image
image_file = "object-detection-opencv/test.jpg"
classes_file = "object-detection-opencv/yolov3.txt"
weights_file = "object-detection-opencv/yolov3.weights"
conf_file = "object-detection-opencv/yolov3.cfg"
image = cv2.imread(image_file)

Width = image.shape[1]
Height = image.shape[0]
scale = 0.00392

# read class names from text file
classes = None
with open(classes_file, 'r') as f:
    classes = [line.strip() for line in f.readlines()]

# generate different colors for different classes 
COLORS = np.random.uniform(0, 255, size=(len(classes), 3))

# read pre-trained model and config file
# reads the weights and config file and creates the network.
net = cv2.dnn.readNet(weights_file, conf_file)

# create input blob 
# prepares the input image to run through the deep neural network
"""
Parameters Type : 
    cv::dnn::blobFromImage (InputArray			image,
                                double  			scalefactor = 1.0,
                                const Size &  		size = Size(),
                                const Scalar &  	mean = Scalar(),
                                bool				swapRB = false,
                                bool  				crop = false,
                                int  				ddepth = CV_32F 
                            )
Parameters Definition
    image			input image (with 1-, 3- or 4-channels).
    size			spatial size for output image
    mean			scalar with mean values which are subtracted from channels. Values are intended to be in 
                    (mean-R, mean-G, mean-B) order if image has BGR ordering and swapRB is true.
    scalefactor		multiplier for image values.
    swapRB			flag which indicates that swap first and last channels in 3-channel image is necessary.
    crop			flag which indicates whether image will be cropped after resize or not
    ddepth			Depth of output blob. Choose CV_32F or CV_8U.                        

Python Signature:
    retval =cv2.dnn.blobFromImage(image[, scalefactor[, size[, mean[, swapRB[, crop[, ddepth]]]]]])

Returns
    4-dimensional Mat with NCHW dimensions order. 
        N: number of images in the batch
        C: number of channels of the image (ex: 3 for RGB, 1 for grayscale...)
        H: height of the image
        W: width of the image

"""
blob = cv2.dnn.blobFromImage(image, scale, (416,416), (0,0,0), True, crop=False)
## Uncomment the code below to print the blob value
# for image in blob:
#     for layer in image:
#         print("------------------------- Color Layer Start --------------------------")
#         print("layer.shape = " + str(layer.shape))
#         for x in range(len(layer)):
#             print("------------------------- Horizontal Line of Pixel Start --------------------------")
#             print("layer[" + str(x) + ",:] = " + str(layer[x,:]))
#         print("------------------------- Horizontal Line of Pixel End --------------------------")
#     print("------------------------- Color Layer End --------------------------")
    
# set input blob for the network
net.setInput(blob)

In [3]:
# function to get the output layer names 
# in the architecture
def get_output_layers(net):
    
    layer_names = net.getLayerNames()
    
    output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]

    return output_layers

# function to draw bounding box on the detected object with class name
def draw_bounding_box(img, class_id, confidence, x, y, x_plus_w, y_plus_h):

    label = str(classes[class_id])

    color = COLORS[class_id]

    cv2.rectangle(img, (x,y), (x_plus_w,y_plus_h), color, 2)

    cv2.putText(img, label, (x-10,y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

In [9]:
# run inference through the network
# and gather predictions from output layers
outs = net.forward(get_output_layers(net))

# initialization
class_ids = []
confidences = []
boxes = []
conf_threshold = 0.5
nms_threshold = 0.4

# for each detetion from each output layer 
# get the confidence, class id, bounding box params
# and ignore weak detections (confidence < 0.5)
for out in outs:
    for detection in out:
        print(detection.shape)
        scores = detection[5:]
        class_id = np.argmax(scores)
        confidence = scores[class_id]
        if confidence > 0.5:
            center_x = int(detection[0] * Width)
            center_y = int(detection[1] * Height)
            w = int(detection[2] * Width)
            h = int(detection[3] * Height)
            x = center_x - w / 2
            y = center_y - h / 2
            class_ids.append(class_id)
            confidences.append(float(confidence))
            boxes.append([x, y, w, h])

            
if len(class_ids) != 0 and "person" in str(class_ids[class_id]):
    print("Image " + image_file + " display at least one person")
else:
    print("Image " + image_file + " is ok")

(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,

(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,

(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,

(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,

(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,

(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,

(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,)
(85,

In [5]:
print(class_ids)

[9, 2, 2, 2, 2, 0, 0, 9, 9, 9, 9, 9, 2, 2, 2, 27]


In [6]:
# apply non-max suppression
indices = cv2.dnn.NMSBoxes(boxes, confidences, conf_threshold, nms_threshold)
print(indices)
# go through the detections remaining
# after nms and draw bounding box
for i in indices:
    i = i[0]
    box = boxes[i]
    x = box[0]
    y = box[1]
    w = box[2]
#     h = box[3]
    
    draw_bounding_box(image, class_ids[i], confidences[i], round(x), round(y), round(x+w), round(y+h))

# # display output image
# cv2.imshow("object detection", image)

# # wait until any key is pressed
# cv2.waitKey()
    
#  # save output image to disk
cv2.imwrite("object-detection.jpg", image)

# # release resources
# cv2.destroyAllWindows()

[[ 6]
 [ 3]
 [ 5]
 [ 8]
 [ 7]
 [ 9]
 [14]
 [12]
 [10]
 [11]
 [15]
 [ 1]
 [13]
 [ 4]
 [ 2]]


True

In [7]:
# get all of the results above a threshold
def get_boxes(boxes, labels, thresh):
    v_boxes, v_labels, v_scores = list(), list(), list()
    # enumerate all boxes
    for box in boxes:
        # enumerate all possible labels
        for i in range(len(labels)):
            # check if the threshold for this label is high enough
            if box.classes[i] > thresh:
                v_boxes.append(box)
                v_labels.append(labels[i])
                v_scores.append(box.classes[i]*100)
                # don't break, many labels may trigger for one box
    return v_boxes, v_labels, v_scores