# YOLO v3 Object Detection

Let's see how to use the state of the art in object detection! Please make sure to watch the video, there is no code along here, since we can't reasonably train the YOLOv3 network ourself, instead we will use a pre-established version.

CODE SOURCE: https://github.com/xiaochus/YOLOv3

REFERENCE (for original YOLOv3): 

        @article{YOLOv3,  
              title={YOLOv3: An Incremental Improvement},  
              author={J Redmon, A Farhadi },
              year={2018} 
--------
----------

-------
-------

In [1]:
import os
import time
import cv2
import numpy as np
from model.yolo_model import YOLO

Using TensorFlow backend.


In [2]:
def process_image(img):
    """Resize, reduce and expand image.

    # Argument:
        img: original image.

    # Returns
        image: ndarray(64, 64, 3), processed image.
    """
    image = cv2.resize(img, (416, 416),
                       interpolation=cv2.INTER_CUBIC)
    image = np.array(image, dtype='float32')
    image /= 255.
    image = np.expand_dims(image, axis=0)

    return image

In [None]:
def process_image(img):
    """Resize, reduce and expand image.

    # Argument:
        img: original image.

    # Returns
        image: ndarray(64, 64, 3), processed image.
    """
    image_org = cv2.resize(img, (416, 416),
                       interpolation=cv2.INTER_CUBIC)
    image_org = np.array(image_org, dtype='float32')
    image_org /= 255.
    image_org = np.expand_dims(image_org, axis=0)

    return image

In [3]:
def get_classes(file):
    """Get classes name.

    # Argument:
        file: classes name for database.

    # Returns
        class_names: List, classes name.

    """
    with open(file) as f:
        class_names = f.readlines()
    class_names = [c.strip() for c in class_names]

    return class_names

In [4]:
def draw(image, boxes, scores, classes, all_classes):
    """Draw the boxes on the image.

    # Argument:
        image: original image.
        boxes: ndarray, boxes of objects.
        classes: ndarray, classes of objects.
        scores: ndarray, scores of objects.
        all_classes: all classes name.
    """
    for box, score, cl in zip(boxes, scores, classes):
        x, y, w, h = box

        top = max(0, np.floor(x + 0.5).astype(int))
        left = max(0, np.floor(y + 0.5).astype(int))
        right = min(image.shape[1], np.floor(x + w + 0.5).astype(int))
        bottom = min(image.shape[0], np.floor(y + h + 0.5).astype(int))

        cv2.rectangle(image, (top, left), (right, bottom), (255, 0, 0), 2)
        cv2.putText(image, '{0} {1:.2f}'.format(all_classes[cl], score),
                    (top, left - 6),
                    cv2.FONT_HERSHEY_SIMPLEX,
                    0.6, (0, 0, 255), 1,
                    cv2.LINE_AA)

        print('class: {0}, score: {1:.2f}'.format(all_classes[cl], score))
        print('box coordinate x,y,w,h: {0}'.format(box))

    print()

In [5]:
def detect_image(image, yolo, all_classes):
    """Use yolo v3 to detect images.

    # Argument:
        image: original image.
        yolo: YOLO, yolo model.
        all_classes: all classes name.

    # Returns:
        image: processed image.
    """
    pimage = process_image(image)

    start = time.time()
    boxes, classes, scores = yolo.predict(pimage, image.shape)
    end = time.time()

    print('time: {0:.2f}s'.format(end - start))

    if boxes is not None:
        draw(image, boxes, scores, classes, all_classes)

    return image

In [6]:
def detect_video(video, yolo, all_classes):
    """Use yolo v3 to detect video.

    # Argument:
        video: video file.
        yolo: YOLO, yolo model.
        all_classes: all classes name.
    """
    video_path = os.path.join("videos", "test", video)
    camera = cv2.VideoCapture(video_path)
    cv2.namedWindow("detection", cv2.WINDOW_AUTOSIZE)

    # Prepare for saving the detected video
    sz = (int(camera.get(cv2.CAP_PROP_FRAME_WIDTH)),
        int(camera.get(cv2.CAP_PROP_FRAME_HEIGHT)))
    fourcc = cv2.VideoWriter_fourcc(*'mpeg')

    
    vout = cv2.VideoWriter()
    vout.open(os.path.join("videos", "res", video), fourcc, 20, sz, True)

    while True:
        res, frame = camera.read()

        if not res:
            break

        image = detect_image(frame, yolo, all_classes)
        cv2.imshow("detection", image)

        # Save the video frame by frame
        vout.write(image)

        if cv2.waitKey(110) & 0xff == 27:
                break

    vout.release()
    camera.release()
    

In [7]:
yolo = YOLO(0.6, 0.5)
file = 'data/coco_classes.txt'
all_classes = get_classes(file)

Instructions for updating:
Colocations handled automatically by placer.




### Detecting Images

In [8]:
f = '20.jpg'
path = 'images/'+f
image = cv2.imread(path)
image = detect_image(image, yolo, all_classes)
cv2.imwrite('images/res/' + f, image)

time: 12.23s
class: person, score: 0.84
box coordinate x,y,w,h: [137.53900623  66.8247689  205.28562737 285.87599194]
class: car, score: 0.96
box coordinate x,y,w,h: [ 23.78177941 138.33699417 456.48375928 213.40966523]



True

# Detecting on Video

In [11]:
# # detect videos one at a time in videos/test folder    
video = 'videoavi1.avi'
detect_video(video, yolo, all_classes)

time: 25.62s
time: 25.64s
time: 25.28s
time: 25.52s
time: 25.73s
time: 26.13s
time: 26.17s
time: 26.08s
time: 26.71s
time: 29.56s
class: car, score: 0.85
box coordinate x,y,w,h: [118.82181168 199.72412109  76.99911118  39.55555201]

time: 32.50s
class: truck, score: 0.62
box coordinate x,y,w,h: [119.7254467  197.56444931  80.5957222   41.81505561]

time: 32.84s
class: truck, score: 0.75
box coordinate x,y,w,h: [125.99826813 192.67278671  78.5406065   42.86000848]

time: 31.77s
time: 33.63s
class: car, score: 0.67
box coordinate x,y,w,h: [148.47999573 180.37888527  61.34636402  42.87616253]

time: 32.54s
time: 32.48s
time: 32.78s
time: 33.23s
time: 33.38s
time: 33.95s
time: 35.62s
time: 34.54s
time: 34.39s
time: 34.35s
time: 35.10s
time: 35.54s
time: 35.78s
time: 35.82s
time: 40.02s
time: 36.26s
time: 42.88s
time: 45.02s
time: 38.38s
time: 42.33s
time: 38.53s
time: 41.94s
time: 42.00s
time: 44.58s
time: 45.71s
time: 43.77s
time: 50.52s
time: 43.97s
time: 44.96s
time: 40.41s
time: 39.14s

KeyboardInterrupt: 

In [12]:
# # detect videos one at a time in videos/test folder    
video = 'video.mp4'
detect_video(video, yolo, all_classes)