In [1]:
import cv2
import matplotlib.pyplot as plt

In [2]:
config_file = './yolov3-tiny.cfg'
trained_model = './yolov3-tiny.weights'

In [3]:
model = cv2.dnn.readNet(trained_model,config_file)

In [4]:
classLabels = []
with open("./coco.names", 'rt') as f:
    classLabels = f.read().splitlines()

In [5]:
classLabels

['person',
 'bicycle',
 'car',
 'motorbike',
 'aeroplane',
 'bus',
 'train',
 'truck',
 'boat',
 'traffic light',
 'fire hydrant',
 'stop sign',
 'parking meter',
 'bench',
 'bird',
 'cat',
 'dog',
 'horse',
 'sheep',
 'cow',
 'elephant',
 'bear',
 'zebra',
 'giraffe',
 'backpack',
 'umbrella',
 'handbag',
 'tie',
 'suitcase',
 'frisbee',
 'skis',
 'snowboard',
 'sports ball',
 'kite',
 'baseball bat',
 'baseball glove',
 'skateboard',
 'surfboard',
 'tennis racket',
 'bottle',
 'wine glass',
 'cup',
 'fork',
 'knife',
 'spoon',
 'bowl',
 'banana',
 'apple',
 'sandwich',
 'orange',
 'broccoli',
 'carrot',
 'hot dog',
 'pizza',
 'donut',
 'cake',
 'chair',
 'sofa',
 'pottedplant',
 'bed',
 'diningtable',
 'toilet',
 'tvmonitor',
 'laptop',
 'mouse',
 'remote',
 'keyboard',
 'cell phone',
 'microwave',
 'oven',
 'toaster',
 'sink',
 'refrigerator',
 'book',
 'clock',
 'vase',
 'scissors',
 'teddy bear',
 'hair drier',
 'toothbrush']

In [6]:
len(classLabels)

80

In [7]:
image=cv2.imread("./Image1.jpeg")
height=image.shape[0]
width=image.shape[1]

In [8]:
blob=cv2.dnn.blobFromImage(image,1/255,(320,320),(0,0,0),swapRB=True,crop=False)

In [9]:
model.setInput(blob)
output_layer_names=model.getUnconnectedOutLayersNames()
outputlayer=model.forward(output_layer_names)

In [10]:
import numpy as np
boxes=[]
confidences=[]
class_ids=[]
for output in outputlayer:
    for detection in output:
        score=detection[5:]
        classid=np.argmax(score)
        confidence=score[classid]
        if confidence>0.4:
            cent_x=int(detection[0]*width)
            cent_y=int(detection[1]*height)
            w=int(detection[2]*width)
            h=int(detection[3]*height)
            x=int(cent_x-w/2)
            y=int(cent_y-h/2)
            boxes.append([x,y,w,h])
            confidences.append(float(confidence))
            class_ids.append(classid)

In [11]:
len(boxes)


6

In [12]:
indexe=cv2.dnn.NMSBoxes(boxes,confidences,0.4,0.5)
font=cv2.FONT_HERSHEY_PLAIN
colors=np.random.uniform(0,255,size=(len(boxes),3))
print(indexe)

[[3]
 [1]
 [5]
 [2]
 [0]
 [4]]


In [13]:
for i in indexe.flatten():
    x,y,w,h=boxes[i]
    label=str(classLabels[class_ids[i]])
    confi=str(round(confidences[i],2))
    color=colors[i]
    cv2.rectangle(image,(x,y),(x+w,y+h),color,5)
    cv2.putText(image,label+" "+confi,(x,y),font,2,(255,255,255),3)

In [14]:
cv2.imshow("Image detection",image)
cv2.waitKey(2)

-1

In [15]:
video=cv2.VideoCapture("./4K Road traffic video for object detection and tracking.mp4")
while True:
    _,image=video.read()
    height=image.shape[0]
    width=image.shape[1]
    blob=cv2.dnn.blobFromImage(image,1/255,(320,320),(0,0,0),swapRB=True,crop=False)
    model.setInput(blob)
    output_layer_names=model.getUnconnectedOutLayersNames()
    outputlayer=model.forward(output_layer_names)
    boxes=[]
    confidences=[]
    class_ids=[]
    for output in outputlayer:
        for detection in output:
            score=detection[5:]
            classid=np.argmax(score)
            confidence=score[classid]
            if confidence>0.1:
                cent_x=int(detection[0]*width)
                cent_y=int(detection[1]*height)
                w=int(detection[2]*width)
                h=int(detection[3]*height)
                x=int(cent_x-w/2)
                y=int(cent_y-h/2)
                boxes.append([x,y,w,h])
                confidences.append(float(confidence))
                class_ids.append(classid)
    indexe=cv2.dnn.NMSBoxes(boxes,confidences,0.1,0.1)
    font=cv2.FONT_HERSHEY_PLAIN
    colors=np.random.uniform(0,255,size=(len(boxes),3))
    if(len(indexe)>0):
        for i in indexe.flatten():
            x,y,w,h=boxes[i]
            label=str(classLabels[class_ids[i]])
            confi=str(round(confidences[i],2))
            color=colors[i] 
            cv2.rectangle(image,(x,y),(x+w,y+h),color,5)
            cv2.putText(image,label+" "+confi,(x,y),font,2,(255,255,255),3)
    cv2.imshow("Object Detection",image)
    if cv2.waitKey(2) & 0xFF == ord(' '):
        break

video.release()
cv2.destroyAllWindows()