In [1]:
import numpy as np
import argparse
import cv2 as cv
import subprocess
import time
import os
from yolo_utils import infer_image, show_image

In [2]:
FLAGS = []

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    
    parser.add_argument('-m', '--model',
        default='./weights/')
    
    parser.add_argument('-w', '--weights',
        default='./weights/yolov3.weights')

    parser.add_argument('-cfg', '--config',
        default='./cfg/yolov3.cfg')

    parser.add_argument('-v', '--video-path',
        default='./videos/airport.mp4')

    parser.add_argument('-vo', '--video-output-path',
        default='./output/airport.avi')

    parser.add_argument('-l', '--labels',
        default='./data/coco.names')

    parser.add_argument('-c', '--confidence',
        type=float,
        default=0.5)

    parser.add_argument('-th', '--threshold',
        type=float,
        default=0.3)

    parser.add_argument('-t', '--show-time',
        default=False)

    FLAGS, unparsed = parser.parse_known_args()


In [3]:


# Get the labels
labels = open(FLAGS.labels).read().strip().split('\n')
print(labels)


['person', 'bicycle', 'car', 'motorbike', 'aeroplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'sofa', 'pottedplant', 'bed', 'diningtable', 'toilet', 'tvmonitor', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']


In [4]:

# Intializing colors to represent each label uniquely
colors = np.random.randint(0, 255, size=(len(labels), 3), dtype='uint8')


In [5]:

# Load the weights and configutation to form the pretrained YOLOv3 model
net = cv.dnn.readNetFromDarknet(FLAGS.config, FLAGS.weights)


In [6]:

    # Get the output layer names of the model
layer_names = net.getLayerNames()

print(layer_names)
print(layer_names[0])

('conv_0', 'bn_0', 'leaky_1', 'conv_1', 'bn_1', 'leaky_2', 'conv_2', 'bn_2', 'leaky_3', 'conv_3', 'bn_3', 'leaky_4', 'shortcut_4', 'conv_5', 'bn_5', 'leaky_6', 'conv_6', 'bn_6', 'leaky_7', 'conv_7', 'bn_7', 'leaky_8', 'shortcut_8', 'conv_9', 'bn_9', 'leaky_10', 'conv_10', 'bn_10', 'leaky_11', 'shortcut_11', 'conv_12', 'bn_12', 'leaky_13', 'conv_13', 'bn_13', 'leaky_14', 'conv_14', 'bn_14', 'leaky_15', 'shortcut_15', 'conv_16', 'bn_16', 'leaky_17', 'conv_17', 'bn_17', 'leaky_18', 'shortcut_18', 'conv_19', 'bn_19', 'leaky_20', 'conv_20', 'bn_20', 'leaky_21', 'shortcut_21', 'conv_22', 'bn_22', 'leaky_23', 'conv_23', 'bn_23', 'leaky_24', 'shortcut_24', 'conv_25', 'bn_25', 'leaky_26', 'conv_26', 'bn_26', 'leaky_27', 'shortcut_27', 'conv_28', 'bn_28', 'leaky_29', 'conv_29', 'bn_29', 'leaky_30', 'shortcut_30', 'conv_31', 'bn_31', 'leaky_32', 'conv_32', 'bn_32', 'leaky_33', 'shortcut_33', 'conv_34', 'bn_34', 'leaky_35', 'conv_35', 'bn_35', 'leaky_36', 'shortcut_36', 'conv_37', 'bn_37', 'leaky_

In [7]:
layer_names = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()]

In [8]:
if FLAGS.video_path:
# Read the video
        try:
            vid = cv.VideoCapture(FLAGS.video_path)
            height, width = None, None
            writer = None
        except:
            raise 'Video cannot be loaded!\n\
                               Please check the path provided!'

        finally:
            while True:
                grabbed, frame = vid.read()

# Checking if the complete video is read
                if not grabbed:
                    break

                if width is None or height is None:
                    height, width = frame.shape[:2]

                frame, _, _, _, _ = infer_image(net, layer_names, height, width, frame, colors, labels, FLAGS)

                if writer is None:
 # Initialize the video writer
                     fourcc = cv.VideoWriter_fourcc(*"MJPG")
                     writer = cv.VideoWriter(FLAGS.video_output_path, fourcc, 30, 
                               (frame.shape[1], frame.shape[0]), True)

                writer.write(frame)

            print ("[INFO] Cleaning up...")
            writer.release()
            vid.release()


[INFO] Cleaning up...
