In [1]:
pip install PyYAML

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip available: 22.3.1 -> 24.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [3]:
import cv2
import numpy as np
import os
import yaml
from yaml.loader import SafeLoader


In [4]:
##Loading yaml file
with open('data.yaml', mode='r') as f:
    data_yaml= yaml.load(f, Loader=SafeLoader)

labels = data_yaml['names']
print(labels)

['person', 'car', 'chair', 'bottle', 'pottedplant', 'bird', 'dog', 'sofa', 'bicycle', 'horse', 'boat', 'motorbike', 'cat', 'tvmonitor', 'cow', 'sheep', 'aeroplane', 'train', 'diningtable', 'bus']


In [6]:
# Load Yolo Model
yolo = cv2.dnn.readNetFromONNX('./Model/weights/best.onnx')
yolo.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV)
yolo.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU)

In [7]:
# Load the test image to get prediction
img = cv2.imread('./street_image.jpg')
image = img.copy()
cv2.imshow('image',image)
cv2.waitKey(0)
cv2.destroyAllWindows()

In [8]:
#row: width of image; col: height of image; d : color
row, col, d = image.shape

max_rc = max(row,col)
input_image = np.zeros((max_rc,max_rc,3),dtype=np.uint8)
cv2.imshow('input_image',input_image)
cv2.waitKey(0)
cv2.destroyAllWindows()

In [9]:
# get the yolo prediction from the image


#converting image into square array
row, col, d = image.shape
max_rc = max(row,col)
input_image = np.zeros((max_rc,max_rc,3),dtype=np.uint8)
input_image[0:row, 0:col] = image



#get prediction from square array
INPUT_WH_YOLO = 640
blob = cv2.dnn.blobFromImage(input_image,1/255,(INPUT_WH_YOLO,INPUT_WH_YOLO),swapRB=True,crop=False)
yolo.setInput(blob)
preds = yolo.forward()


print(preds.shape)
#25200 : bounding boxes; 25 columns : centre x,centre y,W,H,Confidence,  probscore class1, ....probscoreclass20


(1, 25200, 25)


In [10]:
# Non-Maximum Supression (To get away with multiple bounding boxes)

#step1: filter detection based on confidence(0.4) and probability score(0.25)
detections = preds[0]
boxes = []
confidences = []
classes = []


# width and height of the image (input_image)
image_w, image_h = input_image.shape[:2]
x_factor = image_w/INPUT_WH_YOLO
y_factor = image_h/INPUT_WH_YOLO


for i in range (len(detections)):
    row = detections[i]
    confidence = row[4]
    if confidence > 0.4:
         class_score = row[5:].max() #maximum probability from objects
         class_id = row[5:].argmax()# index position at max prob

         if class_score > 0.25:
              cx, cy, w, h = row[0:4]
              # construct bounding from four values
              # left, top, width, height
              left = int((cx - 0.5*w)*x_factor)
              top = int((cy - 0.5*h)*y_factor)
              width = int(w*x_factor)
              height = int(h*y_factor)

              box = np.array([left,top,width,height])

              confidences.append(confidence)
              boxes.append(box)
              classes.append(class_id)



In [11]:
confidences

[np.float32(0.48257402),
 np.float32(0.53962845),
 np.float32(0.54132277),
 np.float32(0.44483432),
 np.float32(0.4479569),
 np.float32(0.60229576),
 np.float32(0.70132756),
 np.float32(0.45455986),
 np.float32(0.47489694),
 np.float32(0.49938738),
 np.float32(0.45838055),
 np.float32(0.6622501),
 np.float32(0.7678153),
 np.float32(0.5696365),
 np.float32(0.73180085),
 np.float32(0.67971927),
 np.float32(0.6272745),
 np.float32(0.5302333),
 np.float32(0.5125046),
 np.float32(0.48839337),
 np.float32(0.42139658),
 np.float32(0.45473224),
 np.float32(0.5198604),
 np.float32(0.4392945),
 np.float32(0.51502156),
 np.float32(0.5927299),
 np.float32(0.61433893),
 np.float32(0.59259903),
 np.float32(0.42723718),
 np.float32(0.53011),
 np.float32(0.42107236),
 np.float32(0.6025764),
 np.float32(0.75373137),
 np.float32(0.40664852),
 np.float32(0.5030193),
 np.float32(0.46019152),
 np.float32(0.4185531),
 np.float32(0.40155166),
 np.float32(0.88447493),
 np.float32(0.8723721),
 np.float32(0.475

In [12]:
boxes

[array([874, 423, 103, 106]),
 array([1763,  532,   62,   97]),
 array([1615,  710,   97,   80]),
 array([762, 352, 129,  98]),
 array([626, 413, 135, 114]),
 array([643, 417, 115, 108]),
 array([871, 418, 114, 116]),
 array([872, 416, 116, 118]),
 array([869, 425, 119, 103]),
 array([661, 470, 146, 122]),
 array([1760,  529,   64,  101]),
 array([285, 620,  62, 190]),
 array([284, 624,  62, 183]),
 array([1880,  613,   41,  205]),
 array([175, 613,  82, 256]),
 array([180, 609,  88, 248]),
 array([203, 614,  69, 223]),
 array([284, 627,  61, 178]),
 array([373, 642,  75, 184]),
 array([1615,  709,   97,   82]),
 array([1712,  680,   83,  111]),
 array([1706,  678,  126,  117]),
 array([1730,  677,  107,  123]),
 array([118, 660,  73, 217]),
 array([168, 623,  84, 241]),
 array([366, 641,  84, 222]),
 array([364, 640,  83, 222]),
 array([828, 669, 103, 208]),
 array([1617,  705,   92,   87]),
 array([1614,  706,   96,   80]),
 array([1716,  679,  129,  124]),
 array([118, 666,  74, 211

In [13]:
classes

[np.int64(1),
 np.int64(0),
 np.int64(0),
 np.int64(1),
 np.int64(1),
 np.int64(1),
 np.int64(1),
 np.int64(1),
 np.int64(1),
 np.int64(1),
 np.int64(0),
 np.int64(0),
 np.int64(0),
 np.int64(0),
 np.int64(0),
 np.int64(0),
 np.int64(0),
 np.int64(0),
 np.int64(0),
 np.int64(0),
 np.int64(0),
 np.int64(0),
 np.int64(0),
 np.int64(0),
 np.int64(0),
 np.int64(0),
 np.int64(0),
 np.int64(0),
 np.int64(0),
 np.int64(0),
 np.int64(0),
 np.int64(0),
 np.int64(0),
 np.int64(0),
 np.int64(0),
 np.int64(0),
 np.int64(1),
 np.int64(1),
 np.int64(19),
 np.int64(19),
 np.int64(1),
 np.int64(1),
 np.int64(1),
 np.int64(1),
 np.int64(1),
 np.int64(0),
 np.int64(1),
 np.int64(0),
 np.int64(0),
 np.int64(0),
 np.int64(0),
 np.int64(0),
 np.int64(0),
 np.int64(0),
 np.int64(0),
 np.int64(1),
 np.int64(1),
 np.int64(0),
 np.int64(0),
 np.int64(0),
 np.int64(0),
 np.int64(0),
 np.int64(1),
 np.int64(0),
 np.int64(1),
 np.int64(0),
 np.int64(0),
 np.int64(0),
 np.int64(0),
 np.int64(0),
 np.int64(1),
 np.

In [14]:
boxes_np = np.array(boxes).tolist()
confidences_np = np.array(confidences).tolist()


#NMS
index = cv2.dnn.NMSBoxes(boxes_np, confidences_np,0.25,0.45).flatten()

In [16]:
len(index) # this are objects with good probability score

24

In [17]:
# Draw Bounding Box

for ind in index:
    x,y,w,h = boxes_np[ind]
    bb_conf = confidences_np[ind]
    classes_id = classes[ind]
    class_name = labels[classes_id]

    text = f'{class_name}: {bb_conf}%'
    print(text)


bus: 0.8844749331474304%
car: 0.8822808265686035%
car: 0.8250964879989624%
person: 0.8225795030593872%
car: 0.8217966556549072%
car: 0.802025318145752%
person: 0.7973363995552063%
car: 0.7738236784934998%
person: 0.7716341018676758%
car: 0.7705860137939453%
car: 0.7690921425819397%
car: 0.7572552561759949%
person: 0.7387762069702148%
car: 0.7386461496353149%
person: 0.7318008542060852%
car: 0.7315365672111511%
person: 0.7075316309928894%
person: 0.6955419182777405%
car: 0.6704357266426086%
person: 0.6613653898239136%
car: 0.6357400417327881%
person: 0.6228829622268677%
person: 0.5696365237236023%
person: 0.5396284461021423%


In [18]:
cv2.rectangle(image, (x,y),(x+w,y+w),(0,255,0),2)
cv2.rectangle(image,(x,y-30),(x+w,y),(0,255,255),-1)

cv2.putText(image,text,(x,y-10),cv2.FONT_HERSHEY_PLAIN,0.7,(0,0,0),1)

array([[[10, 17, 14],
        [11, 18, 15],
        [11, 18, 15],
        ...,
        [ 9, 13, 14],
        [ 9, 13, 14],
        [ 9, 13, 14]],

       [[10, 17, 14],
        [10, 17, 14],
        [11, 18, 15],
        ...,
        [ 9, 13, 14],
        [ 9, 13, 14],
        [ 9, 13, 14]],

       [[10, 17, 14],
        [10, 17, 14],
        [10, 17, 14],
        ...,
        [ 9, 13, 14],
        [ 9, 13, 14],
        [ 9, 13, 14]],

       ...,

       [[33, 42, 52],
        [34, 43, 52],
        [33, 42, 51],
        ...,
        [ 9, 13, 14],
        [ 9, 13, 14],
        [ 9, 13, 14]],

       [[51, 60, 73],
        [51, 61, 71],
        [49, 59, 69],
        ...,
        [ 9, 13, 14],
        [ 9, 13, 14],
        [ 9, 13, 14]],

       [[64, 76, 88],
        [63, 75, 87],
        [63, 73, 83],
        ...,
        [ 9, 13, 14],
        [ 9, 13, 14],
        [ 9, 13, 14]]], dtype=uint8)

In [19]:
cv2.imshow('original',img)
cv2.imshow('yolo_prediction',image)
cv2.waitKey(0)
cv2.destroyAllWindows()