In [35]:
import cv2 
import numpy as np
import os
import time
import sys

In [36]:
CONFIDENCE = 0.7
#SCORE_THRESHOLD = 0.5
#IOU_THRESHOLD = 0.5

# the neural network configuration
config_path = "Files/yolov4-custom-test.cfg"

# the YOLO net weights file
weights_path = "yolov4-custom_best.weights"


labels = open("Files/obj.names").read().strip().split("\n")
# generating colors for each object for later plotting
colors = np.random.randint(0, 255, size=(len(labels), 3), dtype="uint8")

class_count = { cls : 0 for cls in labels }

In [37]:
def load_yolo():
    net = cv2.dnn.readNet(config_path, weights_path)
    classes = labels
    layers_names = net.getLayerNames()
    output_layers = [layers_names[i - 1] for i in net.getUnconnectedOutLayers()]
    colors = np.random.uniform(0, 255, size=(len(classes), 3))
    return net, classes, colors, output_layers

In [38]:
def detect_objects(img, net, outputLayers):
    blob = cv2.dnn.blobFromImage(img, scalefactor=0.00392, size=(320, 320), mean=(0, 0, 0), swapRB=True, crop=False)
    net.setInput(blob)
    outputs = net.forward(outputLayers)
    return blob, outputs

In [43]:
def get_box_dimensions(outputs, height, width):
    boxes = []
    confs = []
    class_ids = []
    for output in outputs:
        for detect in output:
            scores = detect[5:]
            #print(scores)
            class_id = np.argmax(scores)
            conf = scores[class_id]
            if conf > 0.5:
                center_x = int(detect[0] * width)
                center_y = int(detect[1] * height)
                w = int(detect[2] * width)
                h = int(detect[3] * height)
                x = int(center_x - w/2)
                y = int(center_y - h / 2)
                boxes.append([x, y, w, h])
                confs.append(float(conf))
                class_ids.append(class_id)
    return boxes, confs, class_ids 

In [40]:
def draw_labels(boxes, confs, colors, class_ids, classes, img): 
    indexes = cv2.dnn.NMSBoxes(boxes, confs, 0.5, 0.4)
    font = cv2.FONT_HERSHEY_PLAIN
    for i in range(len(boxes)):
        if i in indexes:
            x, y, w, h = boxes[i]
            label = str(classes[class_ids[i]])
            #print(label, )
            color = colors[class_ids[i]]
            img = cv2.rectangle(img, (x,y), (x+w, y+h), color, 2)
            img = cv2.putText(img, label, (x, y - 5), font, 1, color, 1)
            img = cv2.line(img, (0, 700), (1920, 700), (0, 255, 0), thickness=1)
            if y > 700: 
                class_count[label] = class_count[label]+ 1
    return img

In [44]:
def start_video(video_path):
    cap = cv2.VideoCapture(video_path)
    model, classes, colors, output_layers = load_yolo()
    frame_width = int(cap.get(3))
    frame_height = int(cap.get(4))
    out = cv2.VideoWriter('output.avi',cv2.VideoWriter_fourcc('M','J','P','G'), 10, (frame_width,frame_height))

    while True:
        ret,frame = cap.read()
        if ret == True:
            height, width, channels = frame.shape
            blob, outputs = detect_objects(frame, model, output_layers)
            boxes, confs, class_ids = get_box_dimensions(outputs, height, width)
            frame = draw_labels(boxes, confs, colors, class_ids, classes, frame)        
            out.write(frame)

        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

    cap.release()
    out.release()
    cv2.destroyAllWindows() # destroy all opened windows

In [45]:
video_path = 'Test_shot.mp4'

I had to manunally stop the below call as it has been running for more than 5 hours on my laptop.

In [46]:
start_video(video_path)

KeyboardInterrupt: 

The output of this run is saven in output.avi file and below is the count for the currect object

In [47]:
print(class_count)

{'Bicycle': 0, 'Bus': 43, 'Car': 18, 'Motorcycle': 0, 'Truck': 0}
