In [1]:
import cv2
import argparse
import numpy as np

In [2]:
img_path = './imgs/1.jpg'
image = cv2.imread(img_path)

In [3]:
Width = image.shape[1]
Height = image.shape[0]
Height,Width

(513, 770)

In [4]:
scale = 0.00392
# classes = ['person', 'tree', 'crosswalk']
classes = ['person',
'bicycle',
'car',
'motorcycle',
'airplane',
'bus',
'train',
'truck',
'boat',
'traffic light',
'fire hydrant',
'stop sign',
'parking meter',
'bench',
'bird',
'cat',
'dog',
'horse',
'sheep',
'cow',
'elephant',
'bear',
'zebra',
'giraffe',
'backpack',
'umbrella',
'handbag',
'tie',
'suitcase',
'frisbee',
'skis',
'snowboard',
'sports ball',
'kite',
'baseball bat',
'baseball glove',
'skateboard',
'surfboard',
'tennis racket',
'bottle',
'wine glass',
'cup',
'fork',
'knife',
'spoon',
'bowl',
'banana',
'apple',
'sandwich',
'orange',
'broccoli',
'carrot',
'hot dog',
'pizza',
'donut',
'cake',
'chair',
'couch',
'potted plant',
'bed',
'dining table',
'toilet',
'tv',
'laptop',
'mouse',
'remote',
'keyboard',
'cell phone',
'microwave',
'oven',
'toaster',
'sink',
'refrigerator',
'book',
'clock',
'vase',
'scissors',
'teddy bear',
'hair drier',
'toothbrush']

In [5]:
# generate different colors for different classes 
COLORS = np.random.uniform(0, 255, size=(len(classes), 3))

In [6]:
# Load YOLO
net = cv2.dnn.readNet('yolov3.cfg', 'yolov3.weights')

In [7]:
# create input blob 
blob = cv2.dnn.blobFromImage(image, scale, (416,416), (0,0,0), True, crop=False)

In [8]:
# set input blob for the network
net.setInput(blob)

In [9]:
def get_output_layers(net):
    layer_names = net.getLayerNames()
    try:
        output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()]
    except:
        output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]

    return output_layers

In [10]:
outs = net.forward(get_output_layers(net))

In [11]:
class_ids = []
confidences = []
boxes = []
conf_threshold = 0.5
nms_threshold = 0.4

# for each detetion from each output layer 
# get the confidence, class id, bounding box params
# and ignore weak detections (confidence < 0.5)
for out in outs:
    for detection in out:
        scores = detection[5:]
        class_id = np.argmax(scores)
        confidence = scores[class_id]
        if confidence > 0.5:
            center_x = int(detection[0] * Width)
            center_y = int(detection[1] * Height)
            w = int(detection[2] * Width)
            h = int(detection[3] * Height)
            x = center_x - w / 2
            y = center_y - h / 2
            class_ids.append(class_id)
            confidences.append(float(confidence))
            boxes.append([x, y, w, h])

In [12]:
def draw_prediction(img, class_id, confidence, x, y, x_plus_w, y_plus_h):
    # print('class_id: ',class_id)
    label = str(classes[class_id])
    color = COLORS[class_id]
    cv2.rectangle(img, (x,y), (x_plus_w,y_plus_h), color, 2)
    cv2.putText(img, label, (x-10,y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

In [13]:
indices = cv2.dnn.NMSBoxes(boxes, confidences, conf_threshold, nms_threshold)

for i in indices:
    try:
        # print(1)
        box = boxes[i]
        class_id = class_ids[i]
    except:
        # print(2)
        i = i[0]
        box = boxes[i]
        class_id = class_ids[i]
    
    x = box[0]
    y = box[1]
    w = box[2]
    h = box[3]
    draw_prediction(image, class_id, confidences[i], round(x), round(y), round(x+w), round(y+h))

cv2.imshow("object detection", image)
cv2.waitKey()
    
cv2.imwrite("object-detection.jpg", image)
cv2.destroyAllWindows()

In [15]:
def detect(str):
    ''' this script if you want only want get the coord '''
    # picpath = str
    # cfg='D:/core/darknetAB/cfg/yolov3.cfg' #change this if you want use different config
    # coco='D:/core/darknetAB/cfg/coco.data' #you can change this too
    # data='D:/core/darknetAB/yolov3.weights' #and this, can be change by you
    # test = scan(imagePath=picpath, thresh=0.25, configPath=cfg, weightPath=data, metaPath=coco, showImage=False, makeImageOnly=False, initOnly=False) #default format, i prefer only call the result not to produce image to get more performance

    #until here you will get some data in default mode from alexeyAB, as explain in module.
    #try to: help(scan), explain about the result format of process is: [(item_name, convidence_rate (x_center_image, y_center_image, width_size_box, height_size_of_box))], 
    #to change it with generally used form, like PIL/opencv, do like this below (still in detect function that we create):

    newdata = []
    if len(out) >=2:
        for x in out:
            item, confidence_rate, imagedata = x
            x1, y1, w_size, h_size = imagedata
            x_start = round(x1 - (w_size/2))
            y_start = round(y1 - (h_size/2))
            x_end = round(x_start + w_size)
            y_end = round(y_start + h_size)
            data = (item, confidence_rate, (x_start, y_start, x_end, y_end), w_size, h_size)
            newdata.append(data)

    # elif len(test) == 1:
    #     item, confidence_rate, imagedata = test[0]
    #     x1, y1, w_size, h_size = imagedata
    #     x_start = round(x1 - (w_size/2))
    #     y_start = round(y1 - (h_size/2))
    #     x_end = round(x_start + w_size)
    #     y_end = round(y_start + h_size)
    #     data = (item, confidence_rate, (x_start, y_start, x_end, y_end), w_size, h_size)
    #     newdata.append(data)

    else:
        newdata = False

    return newdata