In [8]:
import numpy as np
import cv2
import time


"""
Start of:
Reading input video
"""
video = cv2.VideoCapture('videos/Pexels Videos 2034115.mp4')

writer = None


h, w = None, None

"""
End of:
Reading input video
"""


"""
Start of:
Loading YOLO v3 network
"""

with open('yolo-coco-data/coco.names') as f:

    labels = [line.strip() for line in f]


# Check point
print('List with labels names:')
print(labels)

network = cv2.dnn.readNetFromDarknet('yolo-coco-data/yolov3.cfg',
                                     'yolo-coco-data/yolov3.weights')

layers_names_all = network.getLayerNames()

layers_names_output = \
    [layers_names_all[i[0] - 1] for i in network.getUnconnectedOutLayers()]


probability_minimum = 0.5


threshold = 0.3

colours = np.random.randint(0, 255, size=(len(labels), 3), dtype='uint8')



"""
End of:
Loading YOLO v3 network
"""


"""
Start of:
Reading frames in the loop
"""

f = 0
t = 0

while True:
    ret, frame = video.read()


    if not ret:
        break

    if w is None or h is None:
        h, w = frame.shape[:2]

    """
    Start of:
    Getting blob from current frame
    """


    blob = cv2.dnn.blobFromImage(frame, 1 / 255.0, (416, 416),
                                 swapRB=True, crop=False)

    """
    End of:
    Getting blob from current frame
    """

    """
    Start of:
    Implementing Forward pass
    """

    network.setInput(blob)  # setting blob as input to the network
    start = time.time()
    output_from_network = network.forward(layers_names_output)
    end = time.time()

    f += 1
    t += end - start

    print('Frame number {0} took {1:.5f} seconds'.format(f, end - start))

    """
    End of:
    Implementing Forward pass
    """

    """
    Start of:
    Getting bounding boxes
    """

    bounding_boxes = []
    confidences = []
    class_numbers = []

    for result in output_from_network:
        for detected_objects in result:
            scores = detected_objects[5:]
            class_current = np.argmax(scores)
            confidence_current = scores[class_current]


            if confidence_current > probability_minimum:

                box_current = detected_objects[0:4] * np.array([w, h, w, h])

                x_center, y_center, box_width, box_height = box_current
                x_min = int(x_center - (box_width / 2))
                y_min = int(y_center - (box_height / 2))

                bounding_boxes.append([x_min, y_min,
                                       int(box_width), int(box_height)])
                confidences.append(float(confidence_current))
                class_numbers.append(class_current)

    """
    End of:
    Getting bounding boxes
    """

    """
    Start of:
    Non-maximum suppression
    """


    results = cv2.dnn.NMSBoxes(bounding_boxes, confidences,
                               probability_minimum, threshold)

    """
    End of:
    Non-maximum suppression
    """

    """
    Start of:
    Drawing bounding boxes and labels
    """

    if len(results) > 0:
        for i in results.flatten():

            x_min, y_min = bounding_boxes[i][0], bounding_boxes[i][1]
            box_width, box_height = bounding_boxes[i][2], bounding_boxes[i][3]

            colour_box_current = colours[class_numbers[i]].tolist()

            cv2.rectangle(frame, (x_min, y_min),
                          (x_min + box_width, y_min + box_height),
                          colour_box_current, 5)

            text_box_current = '{}: {:.4f}'.format(labels[int(class_numbers[i])],
                                                   confidences[i])

            cv2.putText(frame, text_box_current, (x_min, y_min - 5),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, colour_box_current, 2)

    """
    End of:
    Drawing bounding boxes and labels
    """

    """
    Start of:
    Writing processed frame into the file
    """

    if writer is None:

        fourcc = cv2.VideoWriter_fourcc(*'mp4v')

        writer = cv2.VideoWriter('videos/result.mp4', fourcc, 30,
                                 (frame.shape[1], frame.shape[0]), True)


    writer.write(frame)

    """
    End of:
    Writing processed frame into the file
    """

"""
End of:
Reading frames in the loop
"""

print()
print('Total number of frames', f)
print('Total amount of time {:.5f} seconds'.format(t))
print('FPS:', round((f / t), 1))


video.release()
writer.release()

List with labels names:
['person', 'bicycle', 'car', 'motorbike', 'aeroplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'sofa', 'pottedplant', 'bed', 'diningtable', 'toilet', 'tvmonitor', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']
Frame number 1 took 0.70961 seconds
Frame number 2 took 0.50166 seconds
Frame number 3 took 0.51061 seconds
Fr

Frame number 197 took 0.56305 seconds
Frame number 198 took 0.56941 seconds
Frame number 199 took 0.57845 seconds
Frame number 200 took 0.54953 seconds
Frame number 201 took 0.56699 seconds
Frame number 202 took 0.56201 seconds
Frame number 203 took 0.57045 seconds
Frame number 204 took 0.55827 seconds
Frame number 205 took 0.56950 seconds
Frame number 206 took 0.54605 seconds
Frame number 207 took 0.59438 seconds
Frame number 208 took 0.56747 seconds
Frame number 209 took 0.55950 seconds
Frame number 210 took 0.54351 seconds
Frame number 211 took 0.55259 seconds
Frame number 212 took 0.58578 seconds
Frame number 213 took 0.56249 seconds
Frame number 214 took 0.55246 seconds
Frame number 215 took 0.58735 seconds
Frame number 216 took 0.56702 seconds
Frame number 217 took 0.56549 seconds
Frame number 218 took 0.62233 seconds
Frame number 219 took 0.63231 seconds
Frame number 220 took 0.63433 seconds
Frame number 221 took 0.56950 seconds
Frame number 222 took 0.58045 seconds
Frame number

Frame number 413 took 0.57345 seconds
Frame number 414 took 0.55591 seconds
Frame number 415 took 0.54949 seconds
Frame number 416 took 0.54349 seconds
Frame number 417 took 0.54154 seconds
Frame number 418 took 0.54554 seconds
Frame number 419 took 0.53956 seconds
Frame number 420 took 0.54056 seconds
Frame number 421 took 0.53853 seconds
Frame number 422 took 0.54449 seconds
Frame number 423 took 0.53907 seconds
Frame number 424 took 0.55851 seconds
Frame number 425 took 0.52557 seconds
Frame number 426 took 0.54054 seconds
Frame number 427 took 0.54206 seconds
Frame number 428 took 0.53352 seconds
Frame number 429 took 0.54255 seconds
Frame number 430 took 0.56050 seconds
Frame number 431 took 0.57145 seconds
Frame number 432 took 0.54853 seconds
Frame number 433 took 0.58444 seconds
Frame number 434 took 0.58742 seconds
Frame number 435 took 0.55751 seconds
Frame number 436 took 0.56050 seconds
Frame number 437 took 0.61236 seconds
Frame number 438 took 0.55830 seconds
Frame number

Frame number 629 took 0.70112 seconds
Frame number 630 took 0.58344 seconds
Frame number 631 took 0.59940 seconds
Frame number 632 took 0.94801 seconds
Frame number 633 took 0.77497 seconds
Frame number 634 took 0.66074 seconds
Frame number 635 took 0.61535 seconds
Frame number 636 took 0.62034 seconds
Frame number 637 took 0.64977 seconds
Frame number 638 took 0.62334 seconds
Frame number 639 took 0.61735 seconds
Frame number 640 took 0.62033 seconds
Frame number 641 took 0.63581 seconds
Frame number 642 took 0.60437 seconds
Frame number 643 took 0.63581 seconds
Frame number 644 took 0.62333 seconds
Frame number 645 took 0.56050 seconds
Frame number 646 took 0.56124 seconds
Frame number 647 took 0.57001 seconds
Frame number 648 took 0.55751 seconds
Frame number 649 took 0.53058 seconds
Frame number 650 took 0.56301 seconds
Frame number 651 took 0.57347 seconds
Frame number 652 took 0.54155 seconds
Frame number 653 took 0.55402 seconds
Frame number 654 took 0.54554 seconds
Frame number

Frame number 845 took 0.52859 seconds
Frame number 846 took 0.52040 seconds
Frame number 847 took 0.50764 seconds
Frame number 848 took 0.51060 seconds
Frame number 849 took 0.52415 seconds
Frame number 850 took 0.52260 seconds
Frame number 851 took 0.51718 seconds
Frame number 852 took 0.52859 seconds
Frame number 853 took 0.54057 seconds
Frame number 854 took 0.54156 seconds
Frame number 855 took 0.51965 seconds
Frame number 856 took 0.51114 seconds

Total number of frames 856
Total amount of time 502.76818 seconds
FPS: 1.7


'\nSome comments\n\nWhat is a FOURCC?\n    FOURCC is short for "four character code" - an identifier for a video codec,\n    compression format, colour or pixel format used in media files.\n    http://www.fourcc.org\n\n\nParameters for cv2.VideoWriter():\n    filename - Name of the output video file.\n    fourcc - 4-character code of codec used to compress the frames.\n    fps\t- Frame rate of the created video.\n    frameSize - Size of the video frames.\n    isColor\t- If it True, the encoder will expect and encode colour frames.\n'