# YOLO v3 Object Detection

Let's see how to use the state of the art in object detection! Please make sure to watch the video, there is no code along here, since we can't reasonably train the YOLOv3 network ourself, instead we will use a pre-established version.

CODE SOURCE: https://github.com/xiaochus/YOLOv3

REFERENCE (for original YOLOv3): 

        @article{YOLOv3,  
              title={YOLOv3: An Incremental Improvement},  
              author={J Redmon, A Farhadi },
              year={2018} 
--------
----------

-------
-------

In [14]:
import os
import time
import cv2
import numpy as np
import matplotlib.pyplot as plt
from model.yolo_model import YOLO

In [2]:
def process_image(img):
    """Resize, reduce and expand image.

    # Argument:
        img: original image.

    # Returns
        image_org: ndarray(64, 64, 3), processed image.
    """
    image_org = cv2.resize(img, (416, 416),
                       interpolation=cv2.INTER_CUBIC)
    image_org = np.array(image_org, dtype='float32')
    image_org /= 255.
    image_org = np.expand_dims(image_org, axis=0)

    return image_org

In [3]:
def get_classes(file):
    """Get classes name.

    # Argument:
        file: classes name for database.

    # Returns
        name_of_class: List, classes name.

    """
    with open(file) as f:
        name_of_class = f.readlines()
    name_of_class = [c.strip() for c in name_of_class]

    return name_of_class

In [4]:
def box_draw(image, boxes, scores, classes, all_classes):
    """Draw the boxes on the image.

    # Argument:
        image: original image.
        boxes: ndarray, boxes of objects.
        classes: ndarray, classes of objects.
        scores: ndarray, scores of objects.
        all_classes: all classes name.
    """
    for box, score, cl in zip(boxes, scores, classes):
        x, y, w, h = box

        top = max(0, np.floor(x + 0.5).astype(int))
        left = max(0, np.floor(y + 0.5).astype(int))
        right = min(image.shape[1], np.floor(x + w + 0.5).astype(int))
        bottom = min(image.shape[0], np.floor(y + h + 0.5).astype(int))

        cv2.rectangle(image, (top, left), (right, bottom), (255, 0, 0), 2)
        cv2.putText(image, '{0} {1:.2f}'.format(all_classes[cl], score),
                    (top, left - 6),
                    cv2.FONT_HERSHEY_SIMPLEX,
                    0.6, (0, 0, 255), 1,
                    cv2.LINE_AA)

        print('class: {0}, score: {1:.2f}'.format(all_classes[cl], score))
        print('box coordinate x,y,w,h: {0}'.format(box))

    print()

In [5]:
def detect_image(image, yolo, all_classes):
    """Use yolo v3 to detect images.

    # Argument:
        image: original image.
        yolo: YOLO, yolo model.
        all_classes: all classes name.

    # Returns:
        image: processed image.
    """
    pimage = process_image(image)

    start = time.time()
    image_boxes, image_classes, image_scores = yolo.predict(pimage, image.shape)
    end = time.time()

    print('time: {0:.2f}s'.format(end - start))

    if image_boxes is not None:
        box_draw(image, image_boxes, image_scores, image_classes, all_classes)

    return image

In [59]:
def detect_video(video, yolo, all_classes):
    """Use yolo v3 to detect video.

    # Argument:
        video: video file.
        yolo: YOLO, yolo model.
        all_classes: all classes name.
    """
    video_path = os.path.join("videos", "test", video)
    camera = cv2.VideoCapture(video_path)
    cv2.namedWindow("detection", cv2.WINDOW_AUTOSIZE)

    # Prepare for saving the detected video
    sz = (int(camera.get(cv2.CAP_PROP_FRAME_WIDTH)),
        int(camera.get(cv2.CAP_PROP_FRAME_HEIGHT)))
    fourcc = cv2.VideoWriter_fourcc(*'mpeg')

    
    vout = cv2.VideoWriter()
    vout.open(os.path.join("videos", "res", video), fourcc, 20, sz, True)

    while True:
        res, frame = camera.read()

        if not res:
            break

        image = detect_image(frame, yolo, all_classes)
        image = cv2.resize(image,(0,0), fx=0.5,fy=0.5)
        cv2.imshow("detection", image)

        # Save the video frame by frame
        vout.write(image)

        if cv2.waitKey(110) & 0xff == 27:
                break

    vout.release()
    camera.release()
    cv2.destroyAllWindows()
    

In [8]:
yolo = YOLO(0.6, 0.5)
file = 'data/coco_classes.txt'
all_classes = get_classes(file)



### Detecting Images

In [9]:
import numpy
numpy.version.version

'1.19.2'

In [10]:
f = 'bike2.jpg'
path = 'images/test/bike2.jpg'
image = cv2.imread(path)


In [14]:
#image = cv2.imread('images/test/person.jpg')

In [11]:
# image

In [36]:
image = detect_image(image, yolo, all_classes)
cv2.imwrite('images/res/' + f, image)

time: 0.65s
class: motorbike, score: 1.00
box coordinate x,y,w,h: [ 40.6100688  116.55529404 412.78901625 376.64705729]



True

In [39]:
cv2.namedWindow("image", cv2.WINDOW_NORMAL)
cv2.imshow('image',image)
cv2.waitKey()
cv2.destroyAllWindows()

# Detecting on Video

In [60]:
# # detect videos one at a time in videos/test folder    
video = 'library1.mp4'
detect_video(video, yolo, all_classes)

time: 0.52s
class: person, score: 1.00
box coordinate x,y,w,h: [422.23278522 476.16264343 173.45147252 374.28131104]
class: bicycle, score: 0.99
box coordinate x,y,w,h: [369.07299042 635.68237305 295.8633399  255.98875046]

time: 0.32s
class: person, score: 1.00
box coordinate x,y,w,h: [401.12045288 476.81549072 175.63258052 369.44412231]
class: bicycle, score: 0.99
box coordinate x,y,w,h: [333.17790985 629.88769531 318.90304327 263.77016068]

time: 0.33s
class: person, score: 1.00
box coordinate x,y,w,h: [387.98526764 483.2711792  185.45423985 369.34688568]
class: bicycle, score: 0.99
box coordinate x,y,w,h: [319.77171421 671.82182312 346.407938   246.46852493]

time: 0.33s
class: person, score: 1.00
box coordinate x,y,w,h: [387.64138699 482.35279083 184.81748343 358.74885559]
class: bicycle, score: 0.99
box coordinate x,y,w,h: [320.5562067  649.5803833  330.62790155 247.7507782 ]

time: 0.32s
class: person, score: 1.00
box coordinate x,y,w,h: [380.89543819 481.66240692 191.67005539 3

In [None]:
# # detect videos one at a time in videos/test folder    
video = 'video.mp4'
detect_video(video, yolo, all_classes)

In [57]:
cv2.destroyAllWindows()