# YOLO v3 Object Detection

Let's see how to use the state of the art in object detection! Please make sure to watch the video, there is no code along here, since we can't reasonably train the YOLOv3 network ourself, instead we will use a pre-established version.

CODE SOURCE: https://github.com/xiaochus/YOLOv3

REFERENCE (for original YOLOv3): 

        @article{YOLOv3,  
              title={YOLOv3: An Incremental Improvement},  
              author={J Redmon, A Farhadi },
              year={2018} 

For the pretrained model, download it from: https://drive.google.com/uc?id=1yT2-zmNFymMgY42Z72LIuqMaiWvYEUQR&export=download

In [1]:
import os
import time
import cv2
import numpy as np
from model.yolo_model import YOLO

Using TensorFlow backend.


In [2]:
def process_image(img):
    """Resize, reduce and expand image.

    # Argument:
        img: original image.

    # Returns
        image: ndarray(64, 64, 3), processed image.
    """
    image = cv2.resize(img, (416, 416),
                       interpolation=cv2.INTER_CUBIC)
    image = np.array(image, dtype='float32')
    image /= 255.
    image = np.expand_dims(image, axis=0)

    return image

In [3]:
def get_classes(file):
    """Get classes name.

    # Argument:
        file: classes name for database.

    # Returns
        class_names: List, classes name.

    """
    with open(file) as f:
        class_names = f.readlines()
    class_names = [c.strip() for c in class_names]

    return class_names

In [4]:
def draw(image, boxes, scores, classes, all_classes):
    """Draw the boxes on the image.

    # Argument:
        image: original image.
        boxes: ndarray, boxes of objects.
        classes: ndarray, classes of objects.
        scores: ndarray, scores of objects.
        all_classes: all classes name.
    """
    for box, score, cl in zip(boxes, scores, classes):
        x, y, w, h = box

        top = max(0, np.floor(x + 0.5).astype(int))
        left = max(0, np.floor(y + 0.5).astype(int))
        right = min(image.shape[1], np.floor(x + w + 0.5).astype(int))
        bottom = min(image.shape[0], np.floor(y + h + 0.5).astype(int))

        cv2.rectangle(image, (top, left), (right, bottom), (255, 0, 0), 2)
        cv2.putText(image, '{0} {1:.2f}'.format(all_classes[cl], score),
                    (top, left - 6),
                    cv2.FONT_HERSHEY_SIMPLEX,
                    0.6, (0, 0, 255), 1,
                    cv2.LINE_AA)

        print('class: {0}, score: {1:.2f}'.format(all_classes[cl], score))
        print('box coordinate x,y,w,h: {0}'.format(box))

    print()

In [5]:
def detect_image(image, yolo, all_classes):
    """Use yolo v3 to detect images.

    # Argument:
        image: original image.
        yolo: YOLO, yolo model.
        all_classes: all classes name.

    # Returns:
        image: processed image.
    """
    pimage = process_image(image)

    start = time.time()
    boxes, classes, scores = yolo.predict(pimage, image.shape)
    end = time.time()

    print('time: {0:.2f}s'.format(end - start))

    if boxes is not None:
        draw(image, boxes, scores, classes, all_classes)

    return image

In [6]:
def detect_video(video, yolo, all_classes):
    """Use yolo v3 to detect video.

    # Argument:
        video: video file.
        yolo: YOLO, yolo model.
        all_classes: all classes name.
    """
    video_path = os.path.join("videos", "test", video)
    camera = cv2.VideoCapture(video_path)
    cv2.namedWindow("detection", cv2.WINDOW_AUTOSIZE)

    # Prepare for saving the detected video
    sz = (int(camera.get(cv2.CAP_PROP_FRAME_WIDTH)),
        int(camera.get(cv2.CAP_PROP_FRAME_HEIGHT)))
    fourcc = cv2.VideoWriter_fourcc(*'mpeg')

    
    vout = cv2.VideoWriter()
    vout.open(os.path.join("videos", "res", video), fourcc, 20, sz, True)

    while True:
        res, frame = camera.read()

        if not res:
            break

        image = detect_image(frame, yolo, all_classes)
        cv2.imshow("detection", image)

        # Save the video frame by frame
        vout.write(image)

        if cv2.waitKey(110) & 0xff == 27:
                break

    vout.release()
    camera.release()
    

In [7]:
yolo = YOLO(0.4, 0.5)
file = 'data/coco_classes.txt'
all_classes = get_classes(file)



### Detecting Images

In [8]:
f = 'look_up.jpg'
path = 'images/'+f
image = cv2.imread(path)
image = detect_image(image, yolo, all_classes)
cv2.imwrite('images/res/' + f, image)

time: 6.08s
class: person, score: 1.00
box coordinate x,y,w,h: [128.44468868 137.330553    59.24983081 193.59691849]
class: car, score: 0.96
box coordinate x,y,w,h: [165.85658681 264.37301517  93.75170904  40.36370981]
class: car, score: 0.50
box coordinate x,y,w,h: [ 40.09348176 269.95847958  82.55194852  33.95508271]



True

# Detecting on Video

In [None]:
# # detect videos one at a time in videos/test folder    
video = 'library1.mp4'
detect_video(video, yolo, all_classes)

time: 4.54s
time: 4.59s
time: 4.95s
time: 4.99s
time: 5.03s
time: 5.24s
time: 5.47s
time: 5.26s
time: 5.38s
time: 5.50s
time: 5.60s
time: 5.78s
time: 5.89s
time: 6.03s
time: 6.17s
time: 6.26s
time: 6.44s
time: 6.64s
time: 6.74s
time: 6.84s
time: 7.04s
time: 7.07s
time: 7.15s
time: 7.28s
time: 7.42s
time: 7.56s
time: 7.62s
time: 7.83s
time: 8.14s
time: 7.95s
time: 8.08s
time: 8.20s
time: 8.43s
time: 8.53s
time: 8.74s
time: 8.80s
time: 8.82s
time: 8.90s
time: 9.07s
time: 9.19s
time: 9.26s
time: 9.58s
time: 9.57s
time: 9.75s
time: 9.81s
time: 9.89s
class: person, score: 0.98
box coordinate x,y,w,h: [ 77.50088722  82.5893569  110.25932953 229.4584322 ]

time: 10.02s
class: person, score: 0.97
box coordinate x,y,w,h: [ 76.32193565  83.76358509 106.28978088 226.4747858 ]

time: 10.20s
class: person, score: 0.99
box coordinate x,y,w,h: [ 78.97354066  84.50096726 101.56925395 225.77258348]

time: 10.63s
class: person, score: 1.00
box coordinate x,y,w,h: [ 86.03263199  86.44786477  96.28368616 

----------
----------