# YOLO v3 Object Detection

Let's see how to use the state of the art in object detection! Please make sure to watch the video, there is no code along here, since we can't reasonably train the YOLOv3 network ourself, instead we will use a pre-established version.

CODE SOURCE: https://github.com/xiaochus/YOLOv3

REFERENCE (for original YOLOv3): 

        @article{YOLOv3,  
              title={YOLOv3: An Incremental Improvement},  
              author={J Redmon, A Farhadi },
              year={2018} 
--------
----------
## YOU MUST WATCH THE VIDEO LECTURE TO PROPERLY SET UP THE MODEL AND WEIGHTS. THIS NOTEBOOK WON'T WORK UNLESS YOU FOLLOW THE EXACT SET UP SHOWN IN THE VIDEO LECTURE.
-------
-------

In [1]:
import os
import time
import cv2
import numpy as np
from model.yolo_model import YOLO

Using TensorFlow backend.


In [4]:
def process_image(img):
    """Resize, reduce and expand image.

    # Argument:
        img: original image.

    # Returns
        image: ndarray(64, 64, 3), processed image.
    """
    image = cv2.resize(img, (416, 416),
                       interpolation=cv2.INTER_CUBIC)
    image = np.array(image, dtype='float32')
    image /= 255.
    image = np.expand_dims(image, axis=0)

    return image

In [5]:
def get_classes(file):
    """Get classes name.

    # Argument:
        file: classes name for database.

    # Returns
        class_names: List, classes name.

    """
    with open(file) as f:
        class_names = f.readlines()
    class_names = [c.strip() for c in class_names]

    return class_names

In [6]:
def draw(image, boxes, scores, classes, all_classes):
    """Draw the boxes on the image.

    # Argument:
        image: original image.
        boxes: ndarray, boxes of objects.
        classes: ndarray, classes of objects.
        scores: ndarray, scores of objects.
        all_classes: all classes name.
    """
    for box, score, cl in zip(boxes, scores, classes):
        x, y, w, h = box

        top = max(0, np.floor(x + 0.5).astype(int))
        left = max(0, np.floor(y + 0.5).astype(int))
        right = min(image.shape[1], np.floor(x + w + 0.5).astype(int))
        bottom = min(image.shape[0], np.floor(y + h + 0.5).astype(int))

        cv2.rectangle(image, (top, left), (right, bottom), (255, 0, 0), 2)
        cv2.putText(image, '{0} {1:.2f}'.format(all_classes[cl], score),
                    (top, left - 6),
                    cv2.FONT_HERSHEY_SIMPLEX,
                    0.6, (0, 0, 255), 1,
                    cv2.LINE_AA)

        print('class: {0}, score: {1:.2f}'.format(all_classes[cl], score))
        print('box coordinate x,y,w,h: {0}'.format(box))

    print()

In [7]:
def detect_image(image, yolo, all_classes):
    """Use yolo v3 to detect images.

    # Argument:
        image: original image.
        yolo: YOLO, yolo model.
        all_classes: all classes name.

    # Returns:
        image: processed image.
    """
    pimage = process_image(image)

    start = time.time()
    boxes, classes, scores = yolo.predict(pimage, image.shape)
    end = time.time()

    print('time: {0:.2f}s'.format(end - start))

    if boxes is not None:
        draw(image, boxes, scores, classes, all_classes)

    return image

In [8]:
def detect_video(video, yolo, all_classes):
    """Use yolo v3 to detect video.

    # Argument:
        video: video file.
        yolo: YOLO, yolo model.
        all_classes: all classes name.
    """
    video_path = os.path.join("videos", "test", video)
    camera = cv2.VideoCapture(video_path)
    cv2.namedWindow("detection", cv2.WINDOW_AUTOSIZE)

    # Prepare for saving the detected video
    sz = (int(camera.get(cv2.CAP_PROP_FRAME_WIDTH)),
        int(camera.get(cv2.CAP_PROP_FRAME_HEIGHT)))
    fourcc = cv2.VideoWriter_fourcc(*'mpeg')

    
    vout = cv2.VideoWriter()
    vout.open(os.path.join("videos", "res", video), fourcc, 20, sz, True)

    while True:
        res, frame = camera.read()

        if not res:
            break

        image = detect_image(frame, yolo, all_classes)
        cv2.imshow("detection", image)

        # Save the video frame by frame
        vout.write(image)

        if cv2.waitKey(110) & 0xff == 27:
                break

    vout.release()
    camera.release()
    

In [9]:
yolo = YOLO(0.6, 0.5)
file = 'data/coco_classes.txt'
all_classes = get_classes(file)



### Detecting Images

In [12]:
f = 'jingxiang-gao-489454-unsplash.jpg'
path = 'images/test/'+f
image = cv2.imread(path)
image = detect_image(image, yolo, all_classes)
cv2.imwrite('images/res/' + f, image)

time: 1.25s
class: person, score: 0.64
box coordinate x,y,w,h: [2523.8918066  1482.56186676  619.408831   1302.60197306]
class: bicycle, score: 0.84
box coordinate x,y,w,h: [2877.69520283 2008.98068333 1303.69895697  717.74808955]



True

In [14]:
f = 'person.jpg'
path = 'images/test/'+f
image = cv2.imread(path)
image = detect_image(image, yolo, all_classes)
cv2.imwrite('images/res/' + f, image)

time: 1.22s
class: person, score: 1.00
box coordinate x,y,w,h: [187.67944336  83.12436104  91.77330017 306.58684635]
class: horse, score: 1.00
box coordinate x,y,w,h: [396.46743774 137.31078506 215.66379547 208.48716593]
class: dog, score: 1.00
box coordinate x,y,w,h: [ 61.278615   263.36980581 145.181036    88.42913878]



True

# Detecting on Video

In [16]:
# detect videos one at a time in videos/test folder    
video = 'funny_dog.mp4'
detect_video(video, yolo, all_classes)

time: 1.14s
class: person, score: 0.96
box coordinate x,y,w,h: [ -8.99477005 -13.95520091 374.6545887  876.1280179 ]
class: person, score: 0.92
box coordinate x,y,w,h: [1087.33394623   75.553177    238.97091866  451.40287042]

time: 1.13s
class: person, score: 0.99
box coordinate x,y,w,h: [ -7.41085052 -15.15665889 387.78422356 889.95598555]
class: person, score: 0.94
box coordinate x,y,w,h: [1088.34182739   77.16451406  237.61413574  448.92492771]

time: 1.23s
class: person, score: 0.97
box coordinate x,y,w,h: [ -3.46245289 -29.72510934 405.80552101 921.72921896]
class: person, score: 0.92
box coordinate x,y,w,h: [1086.72134399   72.97814369  240.02180099  456.73878193]

time: 1.22s
class: person, score: 0.97
box coordinate x,y,w,h: [ -3.08001995 -29.86306071 405.60693741 922.40352631]
class: person, score: 0.92
box coordinate x,y,w,h: [1086.77970886   73.01918149  239.92964745  456.56800032]

time: 1.20s
class: person, score: 0.97
box coordinate x,y,w,h: [  3.805089   -32.25989342 38

time: 1.08s
class: person, score: 0.87
box coordinate x,y,w,h: [ 20.21490097 -11.28819466 423.48466873 714.65442181]
class: person, score: 0.79
box coordinate x,y,w,h: [1100.21072388   58.17117512  226.48255348  427.54603744]

time: 1.09s
class: person, score: 0.89
box coordinate x,y,w,h: [ 26.91691875  -7.67638564 409.44834709 706.44280672]
class: person, score: 0.78
box coordinate x,y,w,h: [1096.78791046   56.65335596  230.86931705  429.66178536]
class: dog, score: 0.62
box coordinate x,y,w,h: [948.64431381 516.44298434 360.58487892 333.69021177]

time: 1.08s
class: person, score: 0.74
box coordinate x,y,w,h: [1097.86514282   54.9110359   225.94711304  427.59222507]

time: 1.09s
class: person, score: 0.80
box coordinate x,y,w,h: [1097.97328949   54.1931802   225.28123856  429.696064  ]
class: person, score: 0.71
box coordinate x,y,w,h: [ 65.48413754   6.4514637  378.4750843  618.83435011]

time: 1.09s
class: person, score: 0.81
box coordinate x,y,w,h: [1097.98130035   54.49570119  22