This file load the model into opencv, make preditions on new images, and construct the image with predicted bbox and labels.

In [None]:
import cv2
import yaml
from yaml.loader import SafeLoader
import os
from os.path import join as pjoin
import numpy as np 

In [None]:
# 1. Load data.yaml file (config)
YOLO_DIR = "yolov5-old/"
with open(pjoin(YOLO_DIR, 'data.yaml'), 'r') as f:
    # this is a dict
    data_yaml = yaml.load(f, Loader=SafeLoader)

labels = list(data_yaml['names'])
labels

In [None]:
# 2. Load trained yolo model into opencv
MODEL_DIR = "./yolov5-old/runs/train/Model100_small/weights/best.onnx"
MODEL_TORCH_DIR = "./yolov5-old/runs/train/Test10_small/weights/best.pt"

# yolov5 = cv2.dnn.readNetFromTorch(MODEL_PRE_DIR)
yolov5 = cv2.dnn.readNetFromONNX(MODEL_DIR)

# set computing engine
yolov5.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)

# make computations on device
yolov5.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA)

In [None]:
# 3. Load testing images
# *** We can have variable size training images, but test images should be cropped to fixed sqaure size ***
def load_img(fname, img_dir="./test_data"):
    orig_image = cv2.imread(pjoin(img_dir, fname))  # original image (to be compared with)
    image = orig_image.copy()   # np ndarray
    nrow, ncol, ch = image.shape
    assert ch == 3, "require testing image in RGB format"

    l = max(nrow, ncol)
    img_input = np.zeros((l,l,3), dtype=np.uint8)   # black background board
    img_input[:nrow, :ncol] = image     # fill in image
    return img_input, orig_image, image    # paddled sqaure image for model input


fname = "000229.jpg"
img_input, orig_image, image  = load_img(fname)

    



In [None]:
# 4. Let the model make predictions
def one_prediction(img_input, model, YOLO_IMG_SZ = (640, 640) ):
    # YOLO_IMG_SZ: see export.py

    # See: https://docs.opencv.org/4.x/d6/d0f/group__dnn.html#ga29f34df9376379a603acd8df581ac8d7
    blob = cv2.dnn.blobFromImage(img_input, 1/255, YOLO_IMG_SZ, swapRB=True, crop=False)
    model.setInput(blob)

    # shape = (1, #bbox, 25), 
    # 25 = 5 {centerX, centerY, w, h, confidence} + 20 {prob score of each class}
    pred = model.forward()
    return pred[0]


pred = one_prediction(image, yolov5)  

In [None]:
# 5. Non-maximum Suppression: 
## filter predictions of a single image based on confidence and prob score threshold
## NMS removes duplicate bbox
def nonmax_sup(pred, img_input, conf_thold=0.1, prob_thold=0.5, YOLO_IMG_WH=640):
    # factors to restore shape info of testing image, which is not 640x640
    x_factor, y_factor = img_input.shape[0]/YOLO_IMG_WH, img_input.shape[1]/YOLO_IMG_WH

    conf_list, bbox_list, id_list = [], [], []
    for row in pred:
        conf = row[4]   # confidence of "this bbox catch an object of whatever class"
        if conf > conf_thold:
            tag_id = row[5:].argmax()   # id (0-19) of "the most likely class/tag"
            prob_score = row[5:][tag_id]  # probability score of "the most likely class/tag"
            if prob_score >= prob_thold:
                # bbox info
                cx, cy, w, h = row[:4]
                # denormalize (restore to int) top-left position, width, height
                width = int(w * x_factor)
                height = int(h * y_factor)
                x_left = int( (cx - 0.5*w) * x_factor)
                y_top = int( (cy - 0.5*h) * y_factor)
                bbox = [x_left, y_top, width, height]

                # store info
                conf_list.append(conf)
                bbox_list.append(bbox)
                id_list.append(tag_id)

    # NMS
    index = cv2.dnn.NMSBoxes(bbox_list, conf_list, conf_thold, prob_thold).flatten()

    return index, conf_list, bbox_list, id_list


                


In [None]:
index, conf_list, bbox_list, id_list = nonmax_sup(pred, img_input)
index

In [None]:
# 6. Draw bbox
## We want each bbox to have a tag (word instead of id) and a probability
img_input, orig_image, image  = load_img(fname)
def draw_bbox():
    for idx in index:
        # retrieve info
        x_left, y_top, width, height = bbox_list[idx]
        conf = conf_list[idx] * 100
        tag = labels[ id_list[idx] ]

        # format text display
        text = f"{tag}: {conf:.1f}%"
        print(text)

        # cv2.rectangle(image, top-left, bot-right, box colr, thickness)
        GREEN = (0, 255, 0)
        BLACK = (0, 0, 0)
        cv2.rectangle(image, (x_left, y_top), (x_left+width, y_top+height), GREEN, 2)
        cv2.putText(image, text, (x_left, y_top-10), cv2.FONT_HERSHEY_PLAIN, 0.8, BLACK, 1)
    

In [None]:
cv2.imshow('Original Image', orig_image)
cv2.imshow('With Object Detection', image)
cv2.waitKey(0)
cv2.destroyAllWindows()