In [1]:
import cv2 
import numpy as np
import time

In [2]:
PATH='coco.names'
classes=[]
image_width=416
threshold=0.6 # confidence threshold
overlap_thresh=0.3 ## non max suppresion threshold

In [3]:
# Extracting class names from file
with open(PATH,'rt') as f:
    classes=f.read().rstrip('\n').split('\n');

model_config='yolov4.cfg'
model_weights='yolov4-tiny.weights'

# Setting up the yolo model
net=cv2.dnn.readNetFromDarknet(model_config,model_weights)
net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA)

In [4]:
def findObject(output,img):
    d=dict() # Dictionary used to count the number of certain objects in the current frame
    height,width,channel = img.shape
    bbox=[]
    classId_list=[]
    confidence_list=[]
    
    for out in output: # Go through each output
        for det in out: # check confidence value fro each class
            conf_score=det[5:]
            classId=np.argmax(conf_score) # store the max confidence value class
            confidence=conf_score[classId]
            
            if confidence>threshold: 
                # det[0],det[1],det[2],det[3] are returned as percentages so we need to convert them by multiplying image width 
                # and height
                w,h=int(det[2]*width),int(det[3]*height) 
                x,y= int((det[0]*width)-w/2),int((det[1]*height)-h/2)
                
                bbox.append([x,y,w,h])
                classId_list.append(classId)
                confidence_list.append(float(confidence))
                
    index=cv2.dnn.NMSBoxes(bbox,confidence_list,threshold,overlap_thresh) #applying non max suppresion
    
    # Display each valid prediction and increase count value
    for i in index:
        i=i[0]
        box=bbox[i]
        x,y,w,h=box[0],box[1],box[2],box[3]
        cv2.rectangle(img, (x, y), (x+w,y+h), (255, 255 , 0), 5)
        cv2.putText(img,f'{classes[classId_list[i]].upper()} {int(confidence_list[i]*100)}%',
                  (x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 255), 2)
        d[classes[classId_list[i]]]=d.get(classes[classId_list[i]],0)+1
    
    
    car_cnt=d.get(classes[2],0)
    truck_cnt=d.get(classes[5],0)
    bus_cnt=d.get(classes[7],0)
    
    cv2.putText(img,"Car count:"+str(car_cnt),(0,20),cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 255), 2)
    cv2.putText(img,"Truck count:"+str(truck_cnt),(0,45),cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 255), 2)
    cv2.putText(img,"Bus count:"+str(bus_cnt),(0,70),cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 255), 2)
    

In [6]:
vid=cv2.VideoCapture('traffic.mkv')
# frame time used to calculate fps
prev_frame_time = 0
new_frame_time = 0
while vid.isOpened():
    success,img =vid.read()
    new_frame_time = time.time()
    # blob is a 4d numpy array with dimension (images, channels, width, height),normalization is done by dividing by 255. Mean subtraction is not performed.
    blob=cv2.dnn.blobFromImage(img,1/255,(image_width,image_width),[0,0,0],1,crop=False) 
    
    # setting input of the network
    net.setInput(blob)
    
    layerNames=net.getLayerNames()
    
    # extracting names of the output layers. yolov4 has two ouput layers
    outputNames=[layerNames[i[0]-1] for i in net.getUnconnectedOutLayers()]
    
    # the first output has dimension (507,85) and the second has (2028,85). The first value indicates the number
    # of bounding boxes found. The second value denotes the 80 classes and the remaining 5 denotes the bounding box and confidence value 
    output=net.forward(outputNames)
    
    # findObject function draws the bounding box,counts the number of objects in a frame and confidence value of each prediction
    findObject(output,img)
    
    fps = 1/(new_frame_time-prev_frame_time)
    prev_frame_time = new_frame_time
 
    fps = int(fps)

    fps = str(fps)
 
    # putting the FPS count on the frame
    cv2.putText(img, "FPS:"+fps, (0,95),cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 255), 2)
 
    cv2.imshow('Image',img)
    if cv2.waitKey(1)==ord('q'):
        break;
    
vid.release()
cv2.destroyAllWindows()