In [68]:
import numpy as np
import time
import cv2
print(f'Number of GPUs available: {cv2.cuda.getCudaEnabledDeviceCount()}')

Number of GPUs available: 0


In [69]:
def yolov5_processing(img, net, input_shape):
    
    H, W = input_shape
    
    # Transform img to blob format (with normalization)
    blob = cv2.dnn.blobFromImage(img, 1/255,  (W, H), [0,0,0], 1, crop=False)
    
    # Get output from img input:
    net.setInput(blob)
    outputs = net.forward(net.getUnconnectedOutLayersNames())
    
    time, _ = net.getPerfProfile()
    info = f'Inference time: {(time*1000.0/cv2.getTickFrequency()):.2f} ms'
    print(info)
    
    return outputs

In [70]:
def draw_label(img, label_text, x1, y1):
    
    # Get label dimesions:
    text_size     = cv2.getTextSize(label_text, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
    dim, baseline = text_size[0], text_size[1]
    
    # Draw label:
    cv2.rectangle(img, (x1, y1), (x1 + dim[0], y1 + dim[1] + baseline), (0,0,0), cv2.FILLED);
    cv2.putText(img, label_text, (x1, y1 + dim[1]), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255,0,0), 1, cv2.LINE_AA)

In [75]:
def yolov5_inference_cv2onnx(img, net,  input_shape, classes, conf_thresh=0.3, cls_thresh=0.1, nms_thresh=0.2):
    
    outputs = yolov5_processing(img, net, input_shape=input_shape)
    
    # Output objs lists:
    bboxes         = []
    class_ids      = []
    confidences    = []
    img_detections = []
    
    # Detections:
    detections = outputs[0].shape[1]
    
    # Loops over detections:
    for d_index in range(detections):
        detection  = outputs[0][0][d_index]
        confidence = detection[4]
        
        # Filter detection by confidence and class score:
        if confidence >= conf_thresh:
        
            # Get class from highest score detected:
            classes_scores = detection[5:]
            class_id = np.argmax(classes_scores)
                
            #  Continue if the class score is above threshold.
            if (classes_scores[class_id] > cls_thresh):
                confidences.append(confidence)
                class_ids.append(class_id)
                    
                # Extracting bbox:
                cX, cY, w, h = detection[0], detection[1], detection[2], detection[3]
                x1     = int((cX - w/2)*(img.shape[1]/input_shape[1]))
                y1     = int((cY - h/2)*(img.shape[0]/input_shape[0]))
                width  = int(w*(img.shape[1]/input_shape[1]))
                height = int(h*(img.shape[0]/input_shape[0]))
                bbox = np.array([x1, y1, width, height])
                bboxes.append(bbox)
                   
    # Removing low conf overlaping bboxes with cv2 non-maxima-supression:
    nms_indeces = cv2.dnn.NMSBoxes(bboxes, confidences, conf_thresh, nms_thresh)
    
    for i in nms_indeces:
        
        bbox = bboxes[i]
        
        x1 = bbox[0] 
        y1 = bbox[1]
        x2 = (x1 + bbox[2])
        y2 = (y1 + bbox[3])
        
        # Draw bounding box and label:     
        cv2.rectangle(img, (x1, y1), (x2, y2), (255,0,0), 3)
        label_text = f'{classes[class_ids[i]]} {confidences[i]:.2f}'           
        draw_label(img, label_text, x1, y1)
        
        # Add to img detections output:
        img_detections.append([bbox, confidences[i], class_ids[i]])
    
    return img, img_detections

In [76]:
# Load class names:
coco_classes =  ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
        'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
        'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
        'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
        'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
        'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
        'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
        'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
        'hair drier', 'toothbrush']

# Loading onnx model:
weights = "yolov5m6_coco.onnx"
net = cv2.dnn.readNet(weights)

# Use GPU:
net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA)

# Load image.
img = cv2.imread('dogs_cats.jpg')

# Processing:
img_output, img_detections = yolov5_inference_cv2onnx(img.copy(), net, input_shape=(1280, 1280), 
                            classes=coco_classes, conf_thresh=0.3, cls_thresh=0.1, nms_thresh=0.2)

print('Detections:', img_detections)
img_output = cv2.resize(img_output, (1000, 640))
cv2.imshow('Output', img_output)
cv2.waitKey(0)
cv2.destroyAllWindows()

Inference time: 1782.26 ms
Detections: [[array([1524,   61,  420,  859]), 0.91709656, 17], [array([ 84, 129, 526, 778]), 0.87771267, 16], [array([806,  68, 438, 854]), 0.851154, 17], [array([1235,  353,  331,  560]), 0.70271075, 15], [array([581, 382, 259, 537]), 0.4275494, 27]]
