In [3]:
import cv2
import numpy as np
import os
import yaml
from yaml.loader import SafeLoader

In [4]:
os.chdir('D:/OneDrive/OneDrive - Tata Insights and Quants/vision')

In [5]:
# load YAML
with open('data.yaml', mode='r') as f:
    data_yaml = yaml.load(f, Loader=SafeLoader)
labels =data_yaml['names']
labels

['person',
 'car',
 'chair',
 'bottle',
 'pottedplant',
 'bird',
 'dog',
 'sofa',
 'bicycle',
 'horse',
 'boat',
 'motorbike',
 'cat',
 'tvmonitor',
 'cow',
 'sheep',
 'aeroplane',
 'train',
 'diningtable',
 'bus']

In [6]:
#Load Yolo model using opencv
yolo = cv2.dnn.readNetFromONNX('2_predictions/Model/weights/best.onnx') 
yolo.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV)
yolo.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU)

In [13]:
#Load the image
img = cv2.imread('2_predictions/street_image.jpg')
image = img.copy()
cv2.imshow('image', image)
cv2.waitKey(0)
cv2.destroyAllWindows

<function destroyAllWindows>

In [7]:
img = cv2.imread('2_predictions/street_image.jpg')
image = img.copy()
row, col, d = image.shape #store the shape of the image (number of rows, columns, and channels) in the variables row, col, and d.

# get the YOLO prediction from the the image
# step-1 convert image into square image (array)
max_rc = max(row,col)
input_image = np.zeros((max_rc,max_rc,3),dtype=np.uint8) #To display a blank image with square image array
cv2.imshow('input_image', input_image)
cv2.waitKey(0)
cv2.destroyAllWindows

<function destroyAllWindows>

In [8]:
input_image[0:row,0:col] = image #To overlay street_image on the black iamge
cv2.imshow('input_image', input_image)
cv2.waitKey(0)
cv2.destroyAllWindows

<function destroyAllWindows>

In [11]:
# step-2: get yolo predictions from square array image

"""
This line preprocesses the input image by creating a "blob",
which is a 4D NumPy array that is compatible with the YOLO model's input format.
The blobFromImage() function resizes the input image to (INPUT_WH_YOLO x INPUT_WH_YOLO),
normalizes the pixel values to be in the range [0,1],
and optionally swaps the Red and Blue channels (because YOLO expects input images in BGR order).
The resulting blob is a 4D NumPy array that has dimensions (1, 3, INPUT_WH_YOLO, INPUT_WH_YOLO).
"""
INPUT_WH_YOLO = 640
blob = cv2.dnn.blobFromImage(input_image,1/255,(INPUT_WH_YOLO,INPUT_WH_YOLO),swapRB=True,crop=False)
yolo.setInput(blob) 
preds = yolo.forward() # passing the blob to the neural network for detection or prediction from YOLO
#The forward() function returns a 4D NumPy array preds that has dimensions (1, n, 1, 7) 
# where n is the number of detected objects and 7 represents the output format of YOLO model which includes
# the center coordinates (x, y), width, height, confidence score, and class probabilities for each detected object.

preds.shape # 25200 bounding boxes detected by yolo in 25 columns
# 1st 5 columns represnts CenterX, CenterY, w, h, confidence score of BB,
# Next 20 columns representing classification score of each class(20 classes)

(1, 25200, 25)

In [17]:
preds[0].shape, input_image.shape[:2]

((25200, 25), (1920, 1920))

In [31]:
detections = preds[0]
detections[0]

array([6.2053843e+00, 5.6192646e+00, 1.5319378e+01, 1.2029615e+01,
       3.1126863e-06, 2.9626408e-01, 4.1468356e-02, 2.8888814e-02,
       1.3442527e-02, 7.0562646e-02, 6.4744771e-02, 8.1728483e-03,
       6.2823361e-03, 8.4436266e-03, 1.6425727e-02, 1.1815503e-02,
       4.2724703e-03, 2.1813011e-02, 1.8395778e-02, 1.9731700e-02,
       5.8100011e-02, 2.6430789e-02, 1.8664990e-02, 3.4893681e-03,
       1.1641061e-02], dtype=float32)

In [32]:
# Non Maximum Supression(to removed duplicate detections, select BB with high confidence and prob. scores)
# step-1: filter detection based on confidence (0.4) and probability score (0.25)
detections = preds[0]
boxes = []
confidences = []
classes = []

# widht and height of the image (input_image)
image_w, image_h = input_image.shape[:2]
x_factor = image_w/INPUT_WH_YOLO #factor by which BB info is multiplied
y_factor = image_h/INPUT_WH_YOLO

for i in range(len(detections)):
    row = detections[i]
    confidence = row[4] # confidence of detection an object
    if confidence > 0.4:
        class_score = row[5:].max() # maximum probability from 20 objects
        class_id = row[5:].argmax() # get the index position at which max probabilty occur
        
        if class_score > 0.25:
            cx, cy, w, h = row[0:4]
            # construct bounding from four values
            # left, top, width and height
            left = int((cx - 0.5*w)*x_factor)
            top = int((cy - 0.5*h)*y_factor)
            width = int(w*x_factor)
            height = int(h*y_factor)
            
            box = np.array([left,top,width,height])
            
            # append values into the list
            confidences.append(confidence)
            boxes.append(box)
            classes.append(class_id)
            
# clean
boxes_np = np.array(boxes).tolist()
confidences_np = np.array(confidences).tolist()

# NMS
index = cv2.dnn.NMSBoxes(boxes_np,confidences_np,0.25,0.45).flatten()
index #From this image we found that 24 objects rows that are having good confidence score and good probablitity

array([291, 122, 175, 312,  93, 230, 151, 105, 189, 253, 136, 108,  69,
       275, 289, 262, 168,  32, 172, 179, 103, 261])

In [37]:
# Draw the Bounding Box
for ind in index:
    #extract bounding box
    x,y,w,h = boxes_np[ind]
    bb_conf = int(confidences_np[ind]*100)
    classes_id = classes[ind]
    class_name = labels[classes_id]

    text = f'{class_name}: {bb_conf}%'
    #print(text)
    cv2.rectangle(image, (x, y), (x+w, y+h), (0,255,0), 2)
    cv2.rectangle(image,(x,y-30),(x+w,y),(255,255,255),-1)
    cv2.putText(image,text, (x,y-10), cv2.FONT_HERSHEY_PLAIN,0.7,(0,0,0),1)

In [38]:
cv2.imshow('original',img)
cv2.imshow('yolo_prediction',image)
cv2.waitKey(0)
cv2.destroyAllWindows()