In [1]:
import numpy as np
import matplotlib.pyplot as plt
import cv2
import glob
import os
import time

### Loading YOLO weights and cfg

In [2]:
weights_path = os.path.join('yolo', 'yolov3.weights')
cfg_path = os.path.join('yolo', 'yolov3.cfg')


### Load neural net in cv2

In [3]:
net = cv2.dnn.readNetFromDarknet(cfg_path, weights_path)

### Get layers name

In [4]:
names = net.getLayerNames()

In [5]:
names

('conv_0',
 'bn_0',
 'leaky_1',
 'conv_1',
 'bn_1',
 'leaky_2',
 'conv_2',
 'bn_2',
 'leaky_3',
 'conv_3',
 'bn_3',
 'leaky_4',
 'shortcut_4',
 'conv_5',
 'bn_5',
 'leaky_6',
 'conv_6',
 'bn_6',
 'leaky_7',
 'conv_7',
 'bn_7',
 'leaky_8',
 'shortcut_8',
 'conv_9',
 'bn_9',
 'leaky_10',
 'conv_10',
 'bn_10',
 'leaky_11',
 'shortcut_11',
 'conv_12',
 'bn_12',
 'leaky_13',
 'conv_13',
 'bn_13',
 'leaky_14',
 'conv_14',
 'bn_14',
 'leaky_15',
 'shortcut_15',
 'conv_16',
 'bn_16',
 'leaky_17',
 'conv_17',
 'bn_17',
 'leaky_18',
 'shortcut_18',
 'conv_19',
 'bn_19',
 'leaky_20',
 'conv_20',
 'bn_20',
 'leaky_21',
 'shortcut_21',
 'conv_22',
 'bn_22',
 'leaky_23',
 'conv_23',
 'bn_23',
 'leaky_24',
 'shortcut_24',
 'conv_25',
 'bn_25',
 'leaky_26',
 'conv_26',
 'bn_26',
 'leaky_27',
 'shortcut_27',
 'conv_28',
 'bn_28',
 'leaky_29',
 'conv_29',
 'bn_29',
 'leaky_30',
 'shortcut_30',
 'conv_31',
 'bn_31',
 'leaky_32',
 'conv_32',
 'bn_32',
 'leaky_33',
 'shortcut_33',
 'conv_34',
 'bn_34',
 'l

### Read labels

In [6]:
label_path = os.path.join('yolo', 'coco.names')
labels = open(label_path).read().strip().split('\n')

In [7]:
labels

['person',
 'bicycle',
 'car',
 'motorbike',
 'aeroplane',
 'bus',
 'train',
 'truck',
 'boat',
 'traffic light',
 'fire hydrant',
 'stop sign',
 'parking meter',
 'bench',
 'bird',
 'cat',
 'dog',
 'horse',
 'sheep',
 'cow',
 'elephant',
 'bear',
 'zebra',
 'giraffe',
 'backpack',
 'umbrella',
 'handbag',
 'tie',
 'suitcase',
 'frisbee',
 'skis',
 'snowboard',
 'sports ball',
 'kite',
 'baseball bat',
 'baseball glove',
 'skateboard',
 'surfboard',
 'tennis racket',
 'bottle',
 'wine glass',
 'cup',
 'fork',
 'knife',
 'spoon',
 'bowl',
 'banana',
 'apple',
 'sandwich',
 'orange',
 'broccoli',
 'carrot',
 'hot dog',
 'pizza',
 'donut',
 'cake',
 'chair',
 'sofa',
 'pottedplant',
 'bed',
 'diningtable',
 'toilet',
 'tvmonitor',
 'laptop',
 'mouse',
 'remote',
 'keyboard',
 'cell phone',
 'microwave',
 'oven',
 'toaster',
 'sink',
 'refrigerator',
 'book',
 'clock',
 'vase',
 'scissors',
 'teddy bear',
 'hair drier',
 'toothbrush']

In [8]:
# layer_names = [names[i - 1] for i in net.getUnconnectedOutLayers()]
layer_names = net.getUnconnectedOutLayersNames()

In [9]:
layer_names

('yolo_82', 'yolo_94', 'yolo_106')

### Run the interference on test image


In [59]:
def runInterference(img):
    (H, W) = img.shape[:2]
    blob = cv2.dnn.blobFromImage(img, 1/255.0, (416, 416), crop=False, swapRB=False)
    net.setInput(blob)

    start_t = time.time()
    layers_output = net.forward(layer_names)
#     print("Forward path throught yolov3 took: ", time.time() - start_t)
    boxes = []
    confidences = []
    classIds = []
    
    for output in layers_output:
        for detection in output:
            scores = detection[5:]
            classId = np.argmax(scores)
            confidence = scores[classId]

            if confidence > 0.80:
                box = detection[:4] * np.array([W, H, W, H])
                bx, by, bw, bh = box.astype('int')

                x = int(bx - (bw / 2))
                y = int(by - (bh / 2))
                boxes.append([x, y, bw, bh])
                confidences.append(float(confidence))
                classIds.append(classId)
    idx = cv2.dnn.NMSBoxes(boxes, confidences, 0.8, 0.8)
    return [idx, confidences, classIds, boxes]
            

### Draw box

In [60]:
def drawBoxes(img, idx, confidences, classIds, boxes):
    if not len(idx):
        return img
    for i in idx.flatten():
        (x, y) = [boxes[i][0], boxes[i][1]]
        (w, h) = [boxes[i][2], boxes[i][3]]
        cv2.rectangle(img, (x, y), (x + w, y + h), (255, 210, 76), 2)
        cv2.putText(img, "{}: {:.3f}".format(labels[classIds[i]], confidences[i]), (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, \
                   0.5, (255, 210, 76), 2)
    return img

### Detect objects

In [61]:
def detectObjects(img):
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    (idx, confidences, classIds, boxes) = runInterference(img)
    drawnImg = drawBoxes(np.copy(img), idx, confidences, classIds, boxes)
    return cv2.cvtColor(drawnImg, cv2.COLOR_RGB2BGR)

### Load test image

In [62]:
image_path = os.path.join('test.jpg')
img = cv2.imread(image_path)
detect_img = detectObjects(img)
cv2.imshow("image", detect_img)
cv2.waitKey(0)
cv2.destroyAllWindows()

### Write video

In [67]:
def writeVideo(inVideoPath, outVideoPath):
    inVideo = cv2.VideoCapture(inVideoPath)
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = 0
    length = int(inVideo.get(cv2.CAP_PROP_FRAME_COUNT))    
    print("Number of frames: ", length)
    curFrame = 0
    while(True):
        retVal, frame = inVideo.read()
        if not retVal:
            break
        if not out:
            out = cv2.VideoWriter(outVideoPath + '/outpy.avi',cv2.VideoWriter_fourcc('M','J','P','G'), 15.0, (frame.shape[1], frame.shape[0]))
        img = detectObjects(frame)
        out.write(img)
        curFrame += 1
        if (curFrame) % 10 == 0:
            print("Current frame: ", curFrame)
    print("Video saved successfully")
    inVideo.release()
    out.release()
    cv2.destroyAllWindows()

### Display video

In [68]:
def displayVideo(path):
    video = cv2.VideoCapture(path)
    while(True):
        retVal, frame = video.read()
        if(not retVal):
            break
        print(frame.shape[:2])
        detect_img = detectObjects(frame)
        cv2.imshow('detect', detect_img)
#         cv2.imshow('raw', frame)
        if(cv2.waitKey(30) == ord('q')):
            break
    
    video.release()
    cv2.destroyAllWindows()

In [69]:
video_path = os.path.join('test_data', 'challenge_video.mp4')
outVideo_path = os.path.join('./')


In [70]:
writeVideo(video_path, outVideo_path)

Number of frames:  484
Current frame:  10
Current frame:  20
Current frame:  30
Current frame:  40
Current frame:  50
Current frame:  60
Current frame:  70
Current frame:  80
Current frame:  90
Current frame:  100
Current frame:  110


KeyboardInterrupt: 