In [1]:
import cv2
import numpy as np
import yaml
from yaml.loader import SafeLoader
import os


In [2]:
# Load data.yaml
with open('data.yaml', mode='r') as f:
    data_yaml = yaml.load(f, Loader= SafeLoader) 
label = data_yaml['names']

In [3]:
# Load YOLO model
yolo = cv2.dnn.readNetFromONNX('./Model/weights/best.onnx')
yolo.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV)
yolo.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU)

In [49]:
#  Load the image
img = cv2.imread('./street_image.jpg')
image = img.copy()
h,w,d = image.shape

# get the YOLO prediction from the the image
# step-1 convert image into square image (array)
max_hw =  max(h,w)
square_img = np.zeros((max_hw,max_hw,3), dtype= np.uint8)
square_img[0:h, 0:w] = image

# step-2: get prediction from square array
INPUT_WH_YOLO = 640
# blob is a required format to work with module DNN
blob = cv2.dnn.blobFromImage(square_img, 1/255, 
                             (INPUT_WH_YOLO,INPUT_WH_YOLO),
                              swapRB=True, # Opencv read image with BGR but YOLO requires RGB format
                              crop=False ) # crop = False: don't crop the image but resize to the fixed size by stretch or skewness
yolo.setInput(blob) # Khi gọi yolo.setInput(blob), bạn đang nói với mô hình rằng "dữ liệu đầu vào cho mô hình YOLO là blob này".
preds = yolo.forward() # predictions with YOLO

In [50]:
# Non Maximum Supression
# step-1: filter detection based on confidence (0.4) and probability score (0.25)
detections = preds[0] #reduce dimension of preds from (1, 25200, 25) to (25200, 25)
boxes = []
confidences = []
classes = []

img_w, img_h = max_hw, max_hw
x_factor = img_w/INPUT_WH_YOLO
y_factor = img_w/INPUT_WH_YOLO

for i in range(len(detections)):
    output = detections[i]
    confidence = output[4] # output: x,y,w,h,prob,pclass
    if confidence > 0.4:
        class_score = output[5:].max() # maximun prob of 20 classes
        class_id = output[5:].argmax() # position of maximun class (class label)
        if class_score > 0.25:
            cx, cy, w, h = output[0:4]
            # construct bounding box
            # left, top, width and height
            left = int((cx - 0.5*w)*x_factor)
            top = int((cy - 0.5*h)*y_factor)
            width = int(w*x_factor)
            height = int(h*y_factor)

            box = np.array([left,top,width,height])

            boxes.append(box)
            confidences.append(class_score)
            classes.append(class_id)
# clean to make sure elements is in the same datatype
boxes_np = np.array(boxes).tolist()
confidences_np = np.array(confidences).tolist()

# NMS
index = cv2.dnn.NMSBoxes(boxes_np, confidences_np, 0.25, 0.4)

In [51]:
len(index)

10

In [52]:
#  Draw the bounding box
for i in index:
    x,y,w,h = boxes_np[i]
    bb_confidence = int(confidences_np[i]*100)
    bb_class = classes[i]
    class_name = label[bb_class]

    text = f'{class_name}: {bb_confidence}%'

    cv2.rectangle(image, (x,y), (x+w, y+h), (0,255,0), 2)
    # cv2.rectangle(image,(x,y-30),(x+w,y),(255,255,255),-1)

    cv2.putText(image, text, (x,y-10), cv2.FONT_HERSHEY_COMPLEX, 0.7, (0,255,0), 1)

In [53]:
cv2.imshow('original',img)
cv2.imshow('yolo_prediction',image)
cv2.waitKey(0)
cv2.destroyAllWindows()