# YOLO v3 Object Detection

Let's see how to use the state of the art in object detection! Please make sure to watch the video, there is no code along here, since we can't reasonably train the YOLOv3 network ourself, instead we will use a pre-established version.

CODE SOURCE: https://github.com/xiaochus/YOLOv3

REFERENCE (for original YOLOv3): 

        @article{YOLOv3,  
              title={YOLOv3: An Incremental Improvement},  
              author={J Redmon, A Farhadi },
              year={2018} 
--------
----------
## YOU MUST WATCH THE VIDEO LECTURE TO PROPERLY SET UP THE MODEL AND WEIGHTS. THIS NOTEBOOK WON'T WORK UNLESS YOU FOLLOW THE EXACT SET UP SHOWN IN THE VIDEO LECTURE.
-------
-------

In [3]:
import tensorflow 

In [1]:
import os
import time
import cv2
import numpy as np
from model.yolo_model import YOLO

In [2]:
def process_image(img):
    """Resize, reduce and expand image.

    # Argument:
        img: original image.

    # Returns
        image: ndarray(64, 64, 3), processed image.
    """
    image = cv2.resize(img, (416, 416),
                       interpolation=cv2.INTER_CUBIC)
    image = np.array(image, dtype='float32')
    image /= 255.
    image = np.expand_dims(image, axis=0)

    return image

In [3]:
def get_classes(file):
    """Get classes name.

    # Argument:
        file: classes name for database.

    # Returns
        class_names: List, classes name.

    """
    with open(file) as f:
        class_names = f.readlines()
    class_names = [c.strip() for c in class_names]

    return class_names

In [4]:
def draw(image, boxes, scores, classes, all_classes):
    """Draw the boxes on the image.

    # Argument:
        image: original image.
        boxes: ndarray, boxes of objects.
        classes: ndarray, classes of objects.
        scores: ndarray, scores of objects.
        all_classes: all classes name.
    """
    for box, score, cl in zip(boxes, scores, classes):
        x, y, w, h = box

        top = max(0, np.floor(x + 0.5).astype(int))
        left = max(0, np.floor(y + 0.5).astype(int))
        right = min(image.shape[1], np.floor(x + w + 0.5).astype(int))
        bottom = min(image.shape[0], np.floor(y + h + 0.5).astype(int))

        cv2.rectangle(image, (top, left), (right, bottom), (255, 0, 0), 2)
        cv2.putText(image, '{0} {1:.2f}'.format(all_classes[cl], score),
                    (top, left - 6),
                    cv2.FONT_HERSHEY_SIMPLEX,
                    0.6, (0, 0, 255), 1,
                    cv2.LINE_AA)

        print('class: {0}, score: {1:.2f}'.format(all_classes[cl], score))
        print('box coordinate x,y,w,h: {0}'.format(box))

    print()

In [5]:
def detect_image(image, yolo, all_classes):
    """Use yolo v3 to detect images.

    # Argument:
        image: original image.
        yolo: YOLO, yolo model.
        all_classes: all classes name.

    # Returns:
        image: processed image.
    """
    pimage = process_image(image)

    start = time.time()
    boxes, classes, scores = yolo.predict(pimage, image.shape)
    end = time.time()

    print('time: {0:.2f}s'.format(end - start))

    if boxes is not None:
        draw(image, boxes, scores, classes, all_classes)

    return image

In [6]:
def detect_video(video, yolo, all_classes):
    """Use yolo v3 to detect video.

    # Argument:
        video: video file.
        yolo: YOLO, yolo model.
        all_classes: all classes name.
    """
    video_path = os.path.join("videos", "test", video)
    camera = cv2.VideoCapture(video_path)
    cv2.namedWindow("detection", cv2.WINDOW_AUTOSIZE)

    # Prepare for saving the detected video
    sz = (int(camera.get(cv2.CAP_PROP_FRAME_WIDTH)),
        int(camera.get(cv2.CAP_PROP_FRAME_HEIGHT)))
    fourcc = cv2.VideoWriter_fourcc(*'mpeg')

    
    vout = cv2.VideoWriter()
    vout.open(os.path.join("videos", "res", video), fourcc, 20, sz, True)

    while True:
        res, frame = camera.read()

        if not res:
            break

        image = detect_image(frame, yolo, all_classes)
        cv2.imshow("detection", image)

        # Save the video frame by frame
        vout.write(image)

        if cv2.waitKey(110) & 0xff == 27:
                break

    vout.release()
    camera.release()
    

In [5]:
yolo = YOLO(0.6, 0.5)
file = 'data/coco_classes.txt'
# all_classes = get_classes(file)



### Detecting Images

In [23]:
print(image)
process_image(image)

None


error: OpenCV(4.6.0) D:\a\opencv-python\opencv-python\opencv\modules\imgproc\src\resize.cpp:4052: error: (-215:Assertion failed) !ssize.empty() in function 'cv::resize'


In [8]:
f = 'formals.jpeg'
path = 'images/'+f
image = cv2.imread(path)
image = detect_image(image, yolo, all_classes)
cv2.imwrite('images/res/' + f, image)

time: 1.71s
class: person, score: 1.00
box coordinate x,y,w,h: [ 97.60655165  73.66957426 561.11924171 952.34444618]
class: tie, score: 0.95
box coordinate x,y,w,h: [390.5273366  372.70505548 117.67774433 476.9596982 ]



True

# Detecting on Video

In [9]:
# detect videos one at a time in videos/test folder    
video = 'college.mp4'
path='videos/'+video
detect_video(video, yolo, all_classes)

time: 0.49s
time: 0.51s
time: 0.52s
time: 0.50s
time: 0.49s
time: 0.52s
time: 0.50s
time: 0.50s
time: 0.48s
time: 0.48s
time: 0.50s
time: 0.49s
time: 0.50s
time: 0.51s
time: 0.50s
time: 0.52s
time: 0.55s
time: 0.51s
time: 0.52s
time: 0.57s
time: 0.52s
time: 0.53s
time: 0.54s
time: 0.50s
time: 0.64s
time: 0.53s
time: 0.50s
time: 0.50s
time: 0.51s
time: 0.51s
time: 0.52s
time: 0.52s
time: 0.49s
time: 0.54s


KeyboardInterrupt: 

In [1]:
def frontcam( yolo, all_classes):
    camera = cv2.VideoCapture(1)
    cv2.namedWindow("detection", cv2.WINDOW_AUTOSIZE)

    # sz = (int(camera.get(cv2.CAP_PROP_FRAME_WIDTH)),
    #     int(camera.get(cv2.CAP_PROP_FRAME_HEIGHT)))
    # fourcc = cv2.VideoWriter_fourcc(*'mpeg')
    # 
    # 
    # vout = cv2.VideoWriter()
    # vout.open(os.path.join("videos", "res", video), fourcc, 20, sz, True)

    while True:
        res, frame = camera.read()
        if not res:
            break
        image = detect_image(frame, yolo, all_classes)
        cv2.imshow("detection", image)

        # Save the video frame by frame
        # vout.write(image)

        if cv2.waitKey(110) & 0xff == 27:
                break

    # vout.release()
    camera.release()


In [9]:
frontcam(yolo,all_classes)

time: 2.58s
time: 0.66s
class: person, score: 0.86
box coordinate x,y,w,h: [ 18.63950729  12.53445625 617.2220993  466.78905487]

time: 0.65s
class: person, score: 0.76
box coordinate x,y,w,h: [ 11.32688522  31.45726204 623.41594696 430.32714844]

time: 0.64s
class: person, score: 0.91
box coordinate x,y,w,h: [ 16.12371445  21.23063564 621.53560638 440.96240044]

time: 0.66s
time: 0.67s
time: 0.66s
class: person, score: 0.80
box coordinate x,y,w,h: [2.91617203e+01 4.57534790e-01 5.75727615e+02 4.80981674e+02]

time: 0.69s
class: person, score: 0.99
box coordinate x,y,w,h: [ 78.81141663  85.38249493 499.64576721 377.46920586]

time: 0.66s
class: person, score: 0.99
box coordinate x,y,w,h: [ 62.99034119 203.61835957 518.10348511 272.33422279]

time: 0.66s
class: person, score: 1.00
box coordinate x,y,w,h: [ 55.32852173 137.2679615  543.43070984 348.27260971]

time: 0.67s
class: person, score: 1.00
box coordinate x,y,w,h: [ 56.59015656  22.94091225 546.25183105 433.00186157]

time: 0.68s


time: 0.69s
class: person, score: 0.91
box coordinate x,y,w,h: [122.26465225  28.02685261 414.46109772 361.59427643]

time: 0.73s
class: person, score: 1.00
box coordinate x,y,w,h: [ 44.19525146  30.15562534 534.27005768 359.68008041]

time: 0.68s
class: person, score: 1.00
box coordinate x,y,w,h: [ 43.17163467   6.17026806 522.29640961 405.63735008]

time: 0.67s
class: person, score: 1.00
box coordinate x,y,w,h: [ 39.5158577   12.18315125 531.47163391 392.87916183]

time: 0.66s
class: person, score: 1.00
box coordinate x,y,w,h: [164.49100494   4.50309277 438.84162903 386.04457855]

time: 0.72s
class: person, score: 1.00
box coordinate x,y,w,h: [179.56176758   3.38434696 446.03096008 391.13505363]

time: 0.68s
class: person, score: 1.00
box coordinate x,y,w,h: [182.03121185  10.79558372 448.51074219 376.80378914]

time: 0.69s
class: person, score: 0.87
box coordinate x,y,w,h: [173.49121094   9.27756786 460.09902954 393.17061424]

time: 0.69s
time: 0.67s
class: person, score: 1.00
box c

In [8]:
tensorflow.keras.utils.plot_model(yolo)

AttributeError: 'YOLO' object has no attribute 'built'