In [9]:
import cv2 as cv
import numpy as np
import os
import yaml
from yaml.loader import SafeLoader as sl

# Predictions:
"""
1. load yaml file and yolo model
2. laod one image and get the yolo predictions(detections) from the image
3. do non maximum suppression----make sure the bounding boxes are correct for multiple bounding boxes --- do this filter
4. draw the bounding bax 
"""

In [10]:
# 1. Load the YAML file ----file data.yaml
with open('data.yaml',mode='r') as f:
    data_yaml = yaml.load(f, Loader=sl)
# call names from the data.yaml file -- this is what will be required 
labels = data_yaml['names']
print(labels)

['person', 'car', 'chair', 'bottle', 'potted plant', 'sheep', 'cow', 'boat', 'horse', 'motor bike', 'bicycle', 'dog', 'bird', 'sofa', 'bus', 'tv monitor', 'cat', 'train', 'aeroplane', 'dining table']


"""

This code is using OpenCV's deep neural network (DNN) module to load and 
configure a YOLO (You Only Look Once) model from a pre-trained ONNX (Open Neural Network Exchange) file. Let's break down each part:

    yolo = cv.dnn.readNetFromONNX('./Model/weights/best.onnx'):
        This line loads a pre-trained model saved in the ONNX format.
        The ONNX file (best.onnx) contains the structure and trained weights of a YOLO model.
        OpenCV provides the readNetFromONNX() function to read the ONNX model 
        and prepare it for inference (predicting results based on input data).

    yolo.setPreferableBackend(cv.dnn.DNN_BACKEND_OPENCV):
        This specifies that the model should use OpenCV as the backend for inference.
        OpenCV has different backends for executing neural networks, such as OpenCV itself, CUDA, or Intel's OpenVINO. 
        Here, we are setting the backend to OpenCV's default DNN module.

    yolo.setPreferableTarget(cv.dnn.DNN_TARGET_CPU):
        This tells OpenCV to use the CPU as the hardware target for running inference.
        Since the model was likely trained in a GPU environment (as indicated by the comment), 
        this line ensures that the model will run on the CPU for inference (prediction) instead of requiring a GPU.
"""

In [11]:
# 1.1 Load YOLO Model
yolo = cv.dnn.readNetFromONNX('./Model/weights/best.onnx')
# Set the backend ---since we trained in GPU environment so we need to specify that we need to use target CPU
yolo.setPreferableBackend(cv.dnn.DNN_BACKEND_OPENCV)
yolo.setPreferableTarget(cv.dnn.DNN_TARGET_CPU)

In [17]:
# 2. Load the image for testing the model
img = cv.imread('./street_image.jpg')
image = img.copy()          # making  a copy of the loaded image file

cv.imshow('image',image)     # for showing the image in seperate window
cv.waitKey(0)
#cv.destroyALLWindows()

# calculate the rows(height) and columns(width) and depth(color_channels) from the image
rows, cols, d = image.shape
print(rows, cols, d)

956 1920 3


In [18]:
# 2.1 Get the yolo predictions from the image 
"""
first we need to convert the image into square matrix --- ONE WAY TO DO THIS IS :
CREATE A DUMMY MATRIX AND OVERLAY THE IMAGE ON IT ----- DEFINE A SQUARE MATRIX WITH MAX(ROWS AND COLUMNS)
"""
# Step 1:convert the image into square image (array)
max_rc = max(rows,cols)
input_image = np.zeros((max_rc,max_rc,3),dtype=np.uint8)    # uint8-- unsigned int bit 8
# overlap the image on this square matrix 
input_image[0:rows,0:cols] = image

# Step 2: pass the square image to yolo model to get predictions
# yolo model trained with input size of 640*640
INPUT_WH_YOLO =640
# find blob from image 
blob = cv.dnn.blobFromImage(input_image,1/255,(INPUT_WH_YOLO,INPUT_WH_YOLO),swapRB=True,crop=False)       # scale factor-- normalised by 255
# set the yolo input 
yolo.setInput(blob)
# predictions from yolo model
preds = yolo.forward()



print(preds.shape)      
    



"""

cv.imshow('Input image', input_image)
cv.waitKey(0)
"""


(1, 25200, 25)


"\n\ncv.imshow('Input image', input_image)\ncv.waitKey(0)\n"

# 25200 rows(number of bounding boxes detected from the image) and 
                        # 25 columns(for each and every bounding box information in the columns) --- divided into first five columns and 20 columns 
                        # first 5 columns show center_x, center_y, w, h and confidence score 
                        # other 20 show probability(clasification) score of each and every class 

In [19]:
#3. NON MAXIMUM SUPPRESSION:
"""
Remove the duplicate Bounding Boxes(detections) and select those bounding boxes which have good confidence score and good probability score
for doing this we will filter the data by confidence score and then by probability and after that apply non maximum suppression method
available directly in OpenCV
"""
#step 1: filter detection based on confidence score (0.4) and probability score (0.25)
        # also take values of center_X, center_y,w,h and reconvert that into original values xmin,xmax,ymin,ymax
# Flatten the preds 
detections = preds[0]
# empty list to store the bounding boxes info, confidence score and prob score of each class
boxes = []
confidences = []
classes = []

#width and height of the image(input)
image_w, image_h = input_image.shape[:2]
# using the input image to get the x factor --factor with which to multiply the bounding box 
x_factor = image_w/INPUT_WH_YOLO
y_factor = image_h/INPUT_WH_YOLO

# filter 
for i in range(len(detections)):
    row = detections[i]
    confidence = row[4]       # confidence score for detecting an object 
    if confidence > 0.4:
        class_score = row[5:].max()    # maximum probability from 20 objects
        class_id = row[5:].argmax()    # index position of maximum probability occurence 
        
        if class_score > 0.25:
            cx, cy, w, h = row[:4]
            # construct bounding box from teh above four values 
            left = int((cx - 0.5*w)*x_factor)
            top = int((cy - 0.5*h)*y_factor)
            width = int(w*x_factor)
            height = int(h*y_factor)
            
            box = np.array([left, top, width, height])
            # append values into the list 
            confidences.append(confidence)
            boxes.append(box)
            classes.append(class_id)


# Cleaning 
boxes_np = np.array(boxes).tolist()
confidences_np = np.array(confidences).tolist()

# NMS --- non maximum suppression  -- params (boxes, score, score_threshold, nms_threshold)
index = cv.dnn.NMSBoxes(boxes_np, confidences_np, 0.25, 0.45).flatten()        


In [20]:
# from the index position that we get from above, Draw the bounding boxes 

for i in index:
    # Extract bounding boxes
    x,y,w,h = boxes_np[i]
    # Extractthe confidences 
    bb_conf = int(confidences_np[i]*100)
    # extract the class
    class_id = classes[i]
    class_name = labels[class_id]
    
    text = f'{class_name}: {bb_conf}%'
    print(text)
    
    cv.rectangle(image,(x,y),(x+w,y+h), (0,255,0),2)
    cv.rectangle(image,(x,y-30),(x+w,y),(255,255,255),-1)
    cv.putText(image,text,(x,y-10),cv.FONT_HERSHEY_PLAIN,0.7,(0,0,0),1)
    

person: 57%
person: 53%
person: 52%
bus: 44%
car: 41%


In [21]:
cv.imshow('Original', img)
cv.imshow('yolo_predictions', image)
cv.waitKey(0)
cv.destroyAllWindows()