# YOLO v3 Object Detection

Let's see how to use the state of the art in object detection! Please make sure to watch the video, there is no code along here, since we can't reasonably train the YOLOv3 network ourself, instead we will use a pre-established version.

CODE SOURCE: https://github.com/xiaochus/YOLOv3

REFERENCE (for original YOLOv3): 

        @article{YOLOv3,  
              title={YOLOv3: An Incremental Improvement},  
              author={J Redmon, A Farhadi },
              year={2018} 
--------
----------

-------
-------

In [2]:
import os
import time
import cv2
import numpy as np
from model.yolo_model import YOLO

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
# def process_image(img):
#     """Resize, reduce and expand image.

#     # Argument:
#         img: original image.

#     # Returns
#         image: ndarray(64, 64, 3), processed image.
#     """
#     image = cv2.resize(img, (416, 416),
#                        interpolation=cv2.INTER_CUBIC)
#     image = np.array(image, dtype='float32')
#     image /= 255.
#     image = np.expand_dims(image, axis=0)

#     return image

In [3]:
def process_image(img):
    """Resize, reduce and expand image.

    # Argument:
        img: original image.

    # Returns
        image_org: ndarray(64, 64, 3), processed image.
    """
    image_org = cv2.resize(img, (416, 416),
                       interpolation=cv2.INTER_CUBIC)
    image_org = np.array(image_org, dtype='float32')
    image_org /= 255.
    image_org = np.expand_dims(image_org, axis=0)

    return image_org

In [4]:
# def get_classes(file):
#     """Get classes name.

#     # Argument:
#         file: classes name for database.

#     # Returns
#         class_names: List, classes name.

#     """
#     with open(file) as f:
#         class_names = f.readlines()
#     class_names = [c.strip() for c in class_names]

#     return class_names

In [5]:
def get_classes(file):
    """Get classes name.

    # Argument:
        file: classes name for database.

    # Returns
        name_of_class: List, classes name.

    """
    with open(file) as f:
        name_of_class = f.readlines()
    name_of_class = [c.strip() for c in name_of_class]

    return name_of_class

In [6]:
# def draw_box(images, boxes, scores, classes, all_classes):
#     """Draw the boxes on the image.

#     # Argument:
#         images: original image.
#         boxes: ndarray, boxes of objects.
#         classes: ndarray, classes of objects.
#         scores: ndarray, scores of objects.
#         all_classes: all classes name.
#     """
#     for box, score, cl in zip(boxes, scores, classes):
#         x, y, w, h = box

#         top = max(0, np.floor(x + 0.5).astype(int))
#         left = max(0, np.floor(y + 0.5).astype(int))
#         right = min(image.shape[1], np.floor(x + w + 0.5).astype(int))
#         bottom = min(image.shape[0], np.floor(y + h + 0.5).astype(int))

#         cv2.rectangle(image, (top, left), (right, bottom), (255, 0, 0), 2)
#         cv2.putText(image, '{0} {1:.2f}'.format(all_classes[cl], score),
#                     (top, left - 6),
#                     cv2.FONT_HERSHEY_SIMPLEX,
#                     0.6, (0, 0, 255), 1,
#                     cv2.LINE_AA)

#         print('class: {0}, score: {1:.2f}'.format(all_classes[cl], score))
#         print('box coordinate x,y,w,h: {0}'.format(box))

#     print()

In [7]:
def box_draw(image, boxes, scores, classes, all_classes):
    """Draw the boxes on the image.

    # Argument:
        image: original image.
        boxes: ndarray, boxes of objects.
        classes: ndarray, classes of objects.
        scores: ndarray, scores of objects.
        all_classes: all classes name.
    """
    for box, score, cl in zip(boxes, scores, classes):
        x, y, w, h = box

        top = max(0, np.floor(x + 0.5).astype(int))
        left = max(0, np.floor(y + 0.5).astype(int))
        right = min(image.shape[1], np.floor(x + w + 0.5).astype(int))
        bottom = min(image.shape[0], np.floor(y + h + 0.5).astype(int))

        cv2.rectangle(image, (top, left), (right, bottom), (255, 0, 0), 2)
        cv2.putText(image, '{0} {1:.2f}'.format(all_classes[cl], score),
                    (top, left - 6),
                    cv2.FONT_HERSHEY_SIMPLEX,
                    0.6, (0, 0, 255), 1,
                    cv2.LINE_AA)

        print('class: {0}, score: {1:.2f}'.format(all_classes[cl], score))
        print('box coordinate x,y,w,h: {0}'.format(box))

    print()

In [8]:
# def detect_image(image, yolo, all_classes):
#     """Use yolo v3 to detect images.

#     # Argument:
#         image: original image.
#         yolo: YOLO, yolo model.
#         all_classes: all classes name.

#     # Returns:
#         image: processed image.
#     """
#     pimage = process_image(image)

#     start = time.time()
#     boxes, classes, scores = yolo.predict(pimage, image.shape)
#     end = time.time()

#     print('time: {0:.2f}s'.format(end - start))

#     if boxes is not None:
#         draw_boxes(image, boxes, scores, classes, all_classes)

#     return image

In [9]:
def detect_image(image, yolo, all_classes):
    """Use yolo v3 to detect images.

    # Argument:
        image: original image.
        yolo: YOLO, yolo model.
        all_classes: all classes name.

    # Returns:
        image: processed image.
    """
    pimage = process_image(image)

    start = time.time()
    image_boxes, image_classes, image_scores = yolo.predict(pimage, image.shape)
    end = time.time()

    print('time: {0:.2f}s'.format(end - start))

    if image_boxes is not None:
        box_draw(image, image_boxes, image_scores, image_classes, all_classes)

    return image

In [10]:
def detect_video(video, yolo, all_classes):
    """Use yolo v3 to detect video.

    # Argument:
        video: video file.
        yolo: YOLO, yolo model.
        all_classes: all classes name.
    """
    video_path = os.path.join("videos", "test", video)
    camera = cv2.VideoCapture(video_path)
    cv2.namedWindow("detection", cv2.WINDOW_AUTOSIZE)

    # Prepare for saving the detected video
    sz = (int(camera.get(cv2.CAP_PROP_FRAME_WIDTH)),
        int(camera.get(cv2.CAP_PROP_FRAME_HEIGHT)))
    fourcc = cv2.VideoWriter_fourcc(*'mpeg')

    
    vout = cv2.VideoWriter()
    vout.open(os.path.join("videos", "res", video), fourcc, 20, sz, True)

    while True:
        res, frame = camera.read()

        if not res:
            break

        image = detect_image(frame, yolo, all_classes)
        cv2.imshow("detection", image)

        # Save the video frame by frame
        vout.write(image)

        if cv2.waitKey(110) & 0xff == 27:
                break

    vout.release()
    camera.release()
    

In [11]:
yolo = YOLO(0.6, 0.5)
file = 'data/coco_classes.txt'
all_classes = get_classes(file)

Instructions for updating:
Colocations handled automatically by placer.




### Detecting Images

In [12]:
import numpy
numpy.version.version

'1.18.1'

In [13]:
f = 'bike2.jpg'
path = 'images/test/bike2.jpg'
image = cv2.imread(path)


In [14]:
#image = cv2.imread('images/test/person.jpg')

In [15]:
image

array([[[141, 100,  31],
        [140,  99,  30],
        [139,  98,  29],
        ...,
        [185, 160, 120],
        [185, 160, 120],
        [185, 160, 120]],

       [[140,  99,  30],
        [140,  99,  30],
        [140,  99,  30],
        ...,
        [185, 160, 120],
        [185, 160, 120],
        [185, 160, 120]],

       [[140,  99,  30],
        [140,  99,  30],
        [141, 100,  31],
        ...,
        [185, 160, 120],
        [185, 160, 120],
        [185, 160, 120]],

       ...,

       [[135, 150, 153],
        [136, 151, 154],
        [137, 152, 155],
        ...,
        [122, 134, 128],
        [119, 131, 125],
        [114, 126, 120]],

       [[135, 150, 152],
        [135, 150, 152],
        [135, 150, 152],
        ...,
        [122, 134, 128],
        [119, 131, 125],
        [114, 126, 120]],

       [[136, 151, 153],
        [135, 150, 152],
        [134, 149, 151],
        ...,
        [122, 134, 128],
        [120, 132, 126],
        [114, 126, 120]]

In [16]:
image = detect_image(image, yolo, all_classes)
cv2.imwrite('images/res/' + f, image)

time: 7.36s
class: person, score: 0.95
box coordinate x,y,w,h: [315.10120487  40.2404087  158.28243494 194.26340738]
class: motorbike, score: 1.00
box coordinate x,y,w,h: [ 33.4812361  137.98993751 345.40128613 353.58710378]
class: motorbike, score: 0.77
box coordinate x,y,w,h: [306.42306995  75.18163888 168.99043286 227.85311642]



True

# Detecting on Video

In [17]:
# # detect videos one at a time in videos/test folder    
video = 'library1.mp4'
detect_video(video, yolo, all_classes)

time: 6.74s
class: person, score: 1.00
box coordinate x,y,w,h: [422.23278522 476.16264343 173.45146179 374.28131104]
class: bicycle, score: 0.99
box coordinate x,y,w,h: [369.07290459 635.68229675 295.8634901  255.98888397]

time: 11.45s
class: person, score: 1.00
box coordinate x,y,w,h: [401.12045288 476.81560516 175.63254833 369.44404602]
class: bicycle, score: 0.99
box coordinate x,y,w,h: [333.17782402 629.88777161 318.9031291  263.7701416 ]

time: 11.78s
class: person, score: 1.00
box coordinate x,y,w,h: [387.98526764 483.2711792  185.45417547 369.34688568]
class: bicycle, score: 0.99
box coordinate x,y,w,h: [319.77162838 671.82174683 346.40808821 246.46860123]

time: 6.56s
class: person, score: 1.00
box coordinate x,y,w,h: [387.6414299  482.35267639 184.81741905 358.74912262]
class: bicycle, score: 0.99
box coordinate x,y,w,h: [320.55616379 649.5803833  330.62790155 247.7507782 ]

time: 10.46s
class: person, score: 1.00
box coordinate x,y,w,h: [380.89543819 481.66233063 191.6700553

time: 25.91s
class: person, score: 0.99
box coordinate x,y,w,h: [115.5973506  504.60845947 189.30554867 320.02216339]
class: bicycle, score: 0.99
box coordinate x,y,w,h: [ 19.47182894 674.86991882 355.25643826 257.07550049]

time: 27.22s
class: person, score: 1.00
box coordinate x,y,w,h: [102.07807302 496.13487244 196.69977665 345.53043365]
class: bicycle, score: 0.99
box coordinate x,y,w,h: [ 11.51035666 673.45565796 337.63284445 257.35544205]

time: 26.35s
class: person, score: 1.00
box coordinate x,y,w,h: [ 89.69544053 493.44749451 199.58658457 345.91110229]
class: bicycle, score: 0.99
box coordinate x,y,w,h: [  3.77275229 681.42944336 317.88180828 247.25730896]

time: 25.23s
class: person, score: 0.99
box coordinate x,y,w,h: [ 63.77949715 504.38163757 181.92932367 316.12703323]
class: bicycle, score: 0.99
box coordinate x,y,w,h: [-14.15315866 681.24809265 331.01677895 234.24030304]

time: 27.79s
class: person, score: 1.00
box coordinate x,y,w,h: [ 54.52458858 501.09573364 177.74516

In [None]:
# # detect videos one at a time in videos/test folder    
video = 'video.mp4'
detect_video(video, yolo, all_classes)