In [1]:
import numpy as  np
import cv2

In [2]:
img = cv2.imread('test-2.webp')
height = img.shape[0]
width = img.shape[1]

In [3]:
#recommended by yolo authors, scale factor is 0.003922 = 1/255.
#accepted sizes are 320×320, 416×416, 608×608. More size means more accuracy but less speed.

img_blob = cv2.dnn.blobFromImage(img, 0.003922, (320,320), swapRB=True, crop=False)

In [4]:
class_labels = ["person","bicycle","car","motorcycle","airplane","bus","train","truck","boat","trafficlight",
                "firehydrant","stopsign","parkingmeter","bench","bird","cat","dog","horse","sheep","cow","elephant",
                "bear","zebra","giraffe","backpack","umbrella","handbag","tie","suitcase","frisbee","skis","snowboard",
                "sportsball","kite","baseballbat","baseballglove","skateboard","surfboard","tennisracket","bottle",
                "wineglass","cup","fork","knife","spoon","bowl","banana","apple","sandwich","orange","broccoli",
                "carrot","hotdog","pizza","donut","cake","chair","sofa","pottedplant","bed","diningtable","toilet",
                "tvmonitor","laptop","mouse","remote","keyboard","cellphone","microwave","oven","toaster","sink",
                "refrigerator","book","clock","vase","scissors","teddybear","hairdrier","toothbrush"]

In [5]:
# White, Red, Lime, Blue, Yellow, Cyan, Magenta, Silver, Gray, Maroon
class_colors = ["255,255,255","255,0,0","0,255,0","0,0,255","255,255,0","0,255,255","255,0,255","192,192,192","128,128,128",
               "128,0,0"]

class_colors = [np.array(x.split(",")).astype("int") for x in class_colors]
class_colors = np.array(class_colors)
class_colors = np.tile(class_colors,(8,1))     # 8x10 colors = 80 classes

In [6]:
yolo_model = cv2.dnn.readNetFromDarknet('yolov3.cfg',"yolov3.weights")

layers = yolo_model.getLayerNames()  # Get all layers from the yolo network
output_layer = [layers[x - 1] for x in yolo_model.getUnconnectedOutLayers()]   # Loop and find the last layer

In [7]:
yolo_model.setInput(img_blob)
obj_detection_layers = yolo_model.forward(output_layer)

- since we get multiple bounding boxed, to remove that we use--> non-max suppression(NMS)

In [8]:
# initialization for non-max suppression(NMS),declare list for "class id", [box center, width & height],[confidences]
class_ids_list = []
boxes_list = []
confidences_list = []

In [9]:
for object_detection_layer in obj_detection_layers:

    for object_detection in object_detection_layer:
        
        # obj_detections[1 to 4] => will have the two center points, box width and box height
        # obj_detections[5] => will have scores.
        
        all_scores = object_detection[5:]
        predicted_class_id = np.argmax(all_scores)
        prediction_confidence = all_scores[predicted_class_id]
    
        if prediction_confidence >= 0.40:

            #obtain the bounding box co-oridnates for actual image from resized image size
            bounding_box = object_detection[0:4] * np.array([width, height, width, height])
            (box_center_x_pt, box_center_y_pt, box_width, box_height) = bounding_box.astype("int")
            start_x_pt = int(box_center_x_pt - (box_width / 2))
            start_y_pt = int(box_center_y_pt - (box_height / 2))
            
            
            #save class id, start x, y, width & height, confidences in a list for nms processing
            #make sure to pass confidence as float and width and height as integers
            class_ids_list.append(predicted_class_id)
            confidences_list.append(float(prediction_confidence))
            boxes_list.append([start_x_pt, start_y_pt, int(box_width), int(box_height)])

# Applying the NMS will return only the selected max value ids while suppressing the non maximum (weak) overlapping bounding boxes      
# Non-Maxima Suppression confidence set as 0.5 & max_suppression threhold for NMS as 0.4
max_value_ids = cv2.dnn.NMSBoxes(boxes_list, confidences_list, 0.5, 0.4)

# loop through the final set of detections remaining after NMS and draw bounding box and write text
for max_valueid in max_value_ids:
    max_class_id = max_valueid
    box = boxes_list[max_class_id]
    start_x_pt = box[0]
    start_y_pt = box[1]
    box_width = box[2]
    box_height = box[3]
    
    #get the predicted class id and label
    predicted_class_id = class_ids_list[max_class_id]
    predicted_class_label = class_labels[predicted_class_id]
    prediction_confidence = confidences_list[max_class_id]

    
    end_x_pt = start_x_pt + box_width
    end_y_pt = start_y_pt + box_height
    
    box_color = class_colors[predicted_class_id]
    box_color = [int(c) for c in box_color]
    
    predicted_class_label = "{}: {:.2f}%".format(predicted_class_label, prediction_confidence * 100)
    
    cv2.rectangle(img, (start_x_pt, start_y_pt), (end_x_pt, end_y_pt), box_color, 1)
    cv2.putText(img, predicted_class_label, (start_x_pt, start_y_pt -7), cv2.FONT_HERSHEY_SIMPLEX, 0.5, box_color, 1)


cv2.imshow("xyz", img)
cv2.waitKey(0)                 
cv2.destroyAllWindows()

---------------