In [1]:
import numpy as np
import cv2 as cv

In [2]:
net=cv.dnn.readNet('yolo_req/yolov3.weights','yolo_req/yolov3_config.cfg')

In [3]:
classes = open('yolo_req/coco_lable.txt').read().strip().split("\n")

In [4]:
classes[:10]

['person',
 'bicycle',
 'car',
 'motorbike',
 'aeroplane',
 'bus',
 'train',
 'truck',
 'boat',
 'traffic light']

In [5]:
#input img
img=cv.imread('images/busy_street.jpg')

height,width,depth=img.shape
print(height,width,depth)

319 480 3


In [6]:
#image pre-proessing

#normalizing by dividing with 255
#converting BGR to RGB
blob=cv.dnn.blobFromImage(img,1/255, (416,416), (0,0,0), swapRB=True, crop=False)
blob.shape

(1, 3, 416, 416)

In [7]:
#giving input to yolo model
net.setInput(blob)

#get names of layers with unconnected outputs and run forward pass
op_layers_names=net.getUnconnectedOutLayersNames()
layer_outputs=net.forward(op_layers_names)

In [8]:
layer_outputs[0].shape

(507, 85)

In [9]:
boxes=[]
predictions=[]
class_ids=[]

In [10]:
for op in layer_outputs:
    #the first 5 elements of the detected objs are the 4 co-ordinates of boxes and 1 accuracy of bounding box 
    #rest of the 80 elements are the prediction score of different classes
    for detected in op:
        preds=detected[5:]
        class_id=np.argmax(preds)
        max_preds=preds[class_id]
        if max_preds>0.5:
            cx=int(detected[0]*width)
            cy=int(detected[1]*height)
            h=int(detected[2]*width)
            w=int(detected[3]*height)
            
            x=int(cx-w/2)
            y=int(cy-h/2)
            
            boxes.append([x,y,w,h])
            predictions.append(float(max_preds))
            class_ids.append(class_id)
            print(float(max_preds)*100)

        

96.95344567298889
73.20223450660706
84.99931693077087
98.61513376235962
92.51368641853333
88.63757848739624
67.39851236343384
53.73169183731079
99.1942048072815
99.7689962387085
88.18381428718567
97.73658514022827
72.576242685318
76.65911912918091
99.42471981048584
98.41717481613159
87.35307455062866
88.12482953071594
94.44882869720459
90.90522527694702
97.69986271858215
90.4704749584198
92.26205945014954
99.6333360671997
99.06739592552185
66.10188484191895
96.02367281913757
83.62448811531067
76.3612687587738
99.74853992462158
71.73553705215454
69.93042826652527
84.0991735458374
57.32426643371582
61.30307912826538
66.45187735557556
59.96044874191284
90.11235237121582
80.8457612991333
91.57910943031311
62.16574311256409
60.9795868396759
65.62705636024475
94.35933828353882
87.3913586139679
82.53733515739441
95.31622529029846
97.89945483207703
95.09103894233704
53.313493728637695
51.69926881790161
69.58565711975098
71.55404090881348
61.339592933654785
92.42700934410095
69.99593377113342
8

In [11]:
indexes=cv.dnn.NMSBoxes(boxes,predictions,0.5,0.4)
print(indexes.flatten())
boxes


[ 9 29 23 14 24 69  3 72 63 47 11  0 46 66 18 62 68 54 22 67 37 16 32 56
 57 58 35 42 53 41 33 71 49 50]


[[372, 72, 39, 23],
 [370, 73, 41, 25],
 [306, 139, 38, 37],
 [309, 140, 32, 41],
 [309, 139, 32, 44],
 [364, 157, 42, 21],
 [341, 170, 40, 48],
 [342, 168, 39, 52],
 [344, 176, 39, 48],
 [345, 174, 37, 52],
 [66, 192, 43, 51],
 [64, 188, 43, 57],
 [59, 192, 44, 62],
 [174, 231, 40, 23],
 [41, 238, 77, 35],
 [43, 233, 73, 44],
 [166, 238, 53, 22],
 [293, 243, 51, 26],
 [-21, 250, 71, 23],
 [-17, 246, 74, 28],
 [45, 238, 71, 39],
 [44, 235, 72, 45],
 [190, 250, 50, 24],
 [289, 248, 58, 24],
 [327, 246, 58, 42],
 [324, 240, 66, 55],
 [325, 250, 62, 42],
 [323, 245, 65, 53],
 [328, 255, 42, 67],
 [330, 254, 45, 71],
 [335, 258, 37, 74],
 [225, 68, 10, 16],
 [225, 67, 12, 17],
 [128, 81, 13, 6],
 [167, 74, 11, 16],
 [167, 71, 14, 19],
 [168, 71, 15, 19],
 [254, 73, 11, 19],
 [253, 72, 14, 18],
 [372, 74, 39, 19],
 [374, 77, 36, 18],
 [170, 93, 15, 6],
 [232, 84, 9, 22],
 [224, 90, 20, 24],
 [224, 88, 19, 27],
 [190, 102, 24, 11],
 [191, 103, 24, 10],
 [224, 92, 15, 24],
 [224, 92, 15, 23],

In [12]:
colors=np.random.uniform(0,255,size=(len(boxes),3))

In [13]:
for i in indexes.flatten():
    x,y,w,h=boxes[i]
    lable=str(classes[class_ids[i]])
    text="{}, {:.2f}%".format(lable,predictions[i]*100)
    color=colors[i]
    cv.rectangle(img,(x,y),(x+w,y+h),color,2)
    cv.putText(img,text, (x,y+20), cv.FONT_HERSHEY_PLAIN, 2, (255,255,255), 2)
    
cv.imshow('Image',img)
cv.waitKey(0)
cv.destroyAllWindows()