In [32]:
# Import the necessary libraries
import numpy as np
import imutils
import time
import cv2
import os

In [33]:
# Specify necessary paths
configPath = r'bccd.cfg'
weightsPath = r'yolov4.bccd.best.weights'
labelsPath = r'prep_data _yolo_v4\train\_darknet.labels'
video_path = 't-72.mp4'
video_path_out = 't-72_nn.mp4'

In [34]:
# Preparing labels from file
LABELS = open(labelsPath).read().strip().split("\n")

In [35]:
# Initialize a list of colors to represent each possible class label
np.random.seed(42)
COLORS = np.random.randint(0, 255, size=(len(LABELS), 3),
    dtype="uint8")

In [36]:
# Read the cnfg and weights of model
net = cv2.dnn.readNetFromDarknet(configPath, weightsPath)

In [37]:
# Determine only the *output* layer names that we need from YOLO
ln = net.getLayerNames()
ln = [ln[i[0] - 1] for i in net.getUnconnectedOutLayers()]

In [68]:
# Initialize the video stream, pointer to output video file, and
# frame dimensions
vs = cv2.VideoCapture(video_path)
writer = None
(W, H) = (None, None)
# Try to determine the total number of frames in the video file
try:
    prop = cv2.cv.CV_CAP_PROP_FRAME_COUNT if imutils.is_cv2() \
        else cv2.CAP_PROP_FRAME_COUNT
    total = int(vs.get(prop))
    print("[INFO] {} total frames in video".format(total))
# An error occurred while trying to determine the total
# number of frames in the video file
except:
    print("[INFO] could not determine # of frames in video")
    print("[INFO] no approx. completion time can be provided")
    total = -1

[INFO] 1500 total frames in video


In [71]:
# loop over frames from the video file stream
while True:
    # Read the next frame from the file
    (grabbed, frame) = vs.read()
    # If the frame was not grabbed, then we have reached the end
    # of the stream
    if not grabbed:
        break
    # If the frame dimensions are empty, grab them
    if W is None or H is None:
        (H, W) = frame.shape[:2]
        
    # Construct a blob from the input frame and then perform a forward
    # pass of the YOLO object detector, giving us our bounding boxes
    # and associated probabilities
    blob = cv2.dnn.blobFromImage(frame, 1 / 255.0, (416, 416),
                                 swapRB=True, crop=False)
    net.setInput(blob)
    start = time.time()
    layerOutputs = net.forward(ln)
    end = time.time()
    # Initialize our lists of detected bounding boxes, confidences,
    # and class IDs, respectively
    boxes = []
    confidences = []
    classIDs = []
    
    # Minimum probability to filter weak detections
    tresh_conf = 0.3

    # This is our non-maxima suppression threshold. Default value is 0.3
    non_max_supp_conf = 0.1
    
    # loop over each of the layer outputs
    for output in layerOutputs:
        # loop over each of the detections
        for detection in output:
            # Extract the class ID and confidence (i.e., probability) of
            # the current object detection
            scores = detection[5:]
            classID = np.argmax(scores)
            confidence = scores[classID]
            # Filter out weak predictions by ensuring the detected
            # probability is greater than the minimum probability
            if confidence > tresh_conf:
                # Scale the bounding box coordinates back relative to the
                # size of the image, keeping in mind that YOLO actually
                # returns the center (x, y)-coordinates of the bounding
                # box followed by the boxes' width and height
                box = detection[0:4] * np.array([W, H, W, H])
                (centerX, centerY, width, height) = box.astype("int")
                # Use the center (x, y)-coordinates to derive the top and
                # and left corner of the bounding box
                x = int(centerX - (width / 2))
                y = int(centerY - (height / 2))
                # Update our list of bounding box coordinates, confidences,
                # and class IDs
                boxes.append([x, y, int(width), int(height)])
                confidences.append(float(confidence))
                classIDs.append(classID)
                
    # Apply non-maxima suppression to suppress weak, overlapping
    # bounding boxes            
    idxs = cv2.dnn.NMSBoxes(boxes, confidences, tresh_conf,
                        non_max_supp_conf)
    
    text_thick = 1 # text thickness
    rect_thick = 2 # rectangle thickness
    font_face = cv2.FONT_HERSHEY_SIMPLEX # font type
    fontScale = 1

    # Ensure that least one detection exists
    if len(idxs) > 0:
        # loop over the indexes we are keeping
        for i in idxs.flatten():
            # Extract the bounding box coordinates
            (x, y) = (boxes[i][0] if boxes[i][0]>=rect_thick else boxes[i][0]-boxes[i][0]+rect_thick, 
                      boxes[i][1] if boxes[i][1]>=rect_thick else boxes[i][1]-boxes[i][1]+rect_thick)
            
            (w, h) = (boxes[i][2], boxes[i][3])
            # Draw a bounding box rectangle and label on the image
            color = [int(c) for c in COLORS[classIDs[i]]]
            cv2.rectangle(frame, (x, y), (x + w, y + h), color, rect_thick)
            text = "{}: {:.1f}".format(LABELS[classIDs[i]], confidences[i]*100)
            y_text = y - 10 if y - 10 > 10 else y + 10
            x_text = x - 10 if x - 10 > 10 else x + 10
            cv2.putText(frame, text, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX,
                fontScale, color, text_thick)

    # Check if the video writer is None
    if writer is None:
        # Initialize our video writer
        fourcc = cv2.VideoWriter_fourcc(*"MJPG")
        writer = cv2.VideoWriter(video_path_out, fourcc, 30,
            (frame.shape[1], frame.shape[0]), True)
        # Some information on processing single frame
        if total > 0:
            elap = (end - start)
            print("[INFO] single frame took {:.4f} seconds".format(elap))
            print("[INFO] estimated total time to finish: {:.4f}".format(
                elap * total))
    # Write the output frame to disk
    writer.write(frame)
    
# Release the file pointers
print("[INFO] cleaning up...")
writer.release()
vs.release()

[INFO] single frame took 3.7352 seconds
[INFO] estimated total time to finish: 5602.8206
[INFO] cleaning up...
