In [1]:
import numpy as np
import cv2

In [2]:
confidenceThreshold = 0.5 #confidence score for detected object, if score is less than object prediction is discarded
NMSThreshold = 0.3 #Non Maximum Suppression, to filter out multiple bouoding box (confidence score dependend) on same object. if overlap greater than 0.3, then omit less confi score box

modelConfiguration = 'cfg/yolov3.cfg' #contains the YOLOv3 model configuration file -> layer size and number
modelWeights = 'yolov3.weights' #pre-trained weights of YOLOv3

labelsPath = 'coco.names'

In [3]:

labels = open(labelsPath).read().strip().split('\n') #splits the object name that yolo can detect

np.random.seed(10)
COLORS = np.random.randint(0, 255, size=(len(labels), 3), dtype="uint8") #random unique color to each object label

net = cv2.dnn.readNetFromDarknet(modelConfiguration, modelWeights) #neural network framework to implement the YOLOv3 object detection algorithm.

outputLayer = net.getLayerNames() #gets all the layer name of yolo
outputLayer = [outputLayer[i - 1] for i in net.getUnconnectedOutLayers()]
video = cv2.VideoCapture('testvideo.mp4')
writer = None #to write the processed video frame to a file here
(W, H) = (None, None) #width and height of the video frame

## Detecting frames in the video

In [4]:
try:
    prop = cv2.CAP_PROP_FRAME_COUNT
    total = int(video.get(prop))
    print("[INFO] {} total frames in video".format(total))
except:
    print("Could not determine no. of frames in video")


[INFO] 339 total frames in video


In [5]:

count = 0

while True:
    (ret, frame) = video.read()
    if not ret:
        break
    if W is None or H is None:
        (H,W) = frame.shape[:2] #shape returns 3 tuple height, width, num of channels, so slice to first 2 tuple

    blob = cv2.dnn.blobFromImage(frame, 1 / 255.0, (416, 416), swapRB = True, crop = False) #blob is preprocessed standard representation of image that can be input in yolo
    net.setInput(blob)
    layersOutputs = net.forward(outputLayer) #forward pass inference -> feeding input data and producing prediction based on that from the neural network model

    boxes = []
    confidences = []
    classIDs = []

    for output in layersOutputs:
        for detection in output:
            scores = detection[5:]
            classID = np.argmax(scores)
            confidence = scores[classID]
            if confidence > confidenceThreshold:
                box = detection[0:4] * np.array([W, H, W, H])
                (centerX, centerY,  width, height) = box.astype('int')
                x = int(centerX - (width/2))
                y = int(centerY - (height/2))

                boxes.append([x, y, int(width), int(height)])
                confidences.append(float(confidence))
                classIDs.append(classID)

    #Apply Non Maxima Suppression
    detectionNMS = cv2.dnn.NMSBoxes(boxes, confidences, confidenceThreshold, NMSThreshold)
    if(len(detectionNMS) > 0):
        for i in detectionNMS.flatten():
            (x, y) = (boxes[i][0], boxes[i][1])
            (w, h) = (boxes[i][2], boxes[i][3])

            color = [int(c) for c in COLORS[classIDs[i]]]
            cv2.rectangle(frame, (x, y), (x + w, y + h), color, 2)
            text = '{}: {:.4f}'.format(labels[classIDs[i]], confidences[i])
            cv2.putText(frame, text, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

            if writer is None:
                fourcc = cv2.VideoWriter_fourcc(*'MJPG')
                writer = cv2.VideoWriter('testvideo_output.mp4', fourcc, 30, (frame.shape[1], frame.shape[0]), True)
    if writer is not None:
        writer.write(frame)
        print("Writing frame" , count+1)
        count = count + 1

writer.release()
video.release()

Writing frame 1
Writing frame 2
Writing frame 3
Writing frame 4
Writing frame 5
Writing frame 6
Writing frame 7
Writing frame 8
Writing frame 9
Writing frame 10
Writing frame 11
Writing frame 12
Writing frame 13
Writing frame 14
Writing frame 15
Writing frame 16
Writing frame 17
Writing frame 18
Writing frame 19
Writing frame 20
Writing frame 21
Writing frame 22
Writing frame 23
Writing frame 24
Writing frame 25
Writing frame 26
Writing frame 27
Writing frame 28
Writing frame 29
Writing frame 30
Writing frame 31
Writing frame 32
Writing frame 33
Writing frame 34
Writing frame 35
Writing frame 36
Writing frame 37
Writing frame 38
Writing frame 39
Writing frame 40
Writing frame 41
Writing frame 42
Writing frame 43
Writing frame 44
Writing frame 45
Writing frame 46
Writing frame 47
Writing frame 48
Writing frame 49
Writing frame 50
Writing frame 51
Writing frame 52
Writing frame 53
Writing frame 54
Writing frame 55
Writing frame 56
Writing frame 57
Writing frame 58
Writing frame 59
Writin