# Implement YOLOV3 from `YunYang94` code

[Source code](https://github.com/YunYang1994/TensorFlow2.0-Examples/tree/f99fcef22caa2758b5eefce10ee789384345506d/4-Object_Detection/YOLOV3) that this project is based on.

----

## Load packages

In [1]:
import cv2
import time
import numpy as np
import pandas as pd
import core.utils as utils
import tensorflow as tf
# from core.yolov3 import YOLOv3, decode

from tqdm import tqdm
import os
import matplotlib.pyplot as plt
import gc

In [2]:
print('tensorflow version:', tf.__version__)

physical_devices = tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], True)

tf.per_process_gpu_memory_fraction = 0.3

tensorflow version: 2.2.0-rc2


In [3]:
# tf.config.experimental.list_physical_devices('CPU')

## 1. Load video and predict on each frame

In [4]:
video_path      = "/data/archives/1.1.11.MP4"
# video_path      = 0
num_classes     = 80
input_size      = 416

Load YOLOV3 model

In [5]:
# Load from weight file

# input_layer  = tf.keras.layers.Input([input_size, input_size, 3])
# feature_maps = YOLOv3(input_layer)

# bbox_tensors = []
# for i, fm in enumerate(feature_maps):
#     bbox_tensor = decode(fm, i)
#     bbox_tensors.append(bbox_tensor)

# model = tf.keras.Model(input_layer, bbox_tensors)
# utils.load_weights(model, "/data/model/yolov3.weights")

In [7]:
# Load from h5 file
model_path = '/data/model/yolov3.h5'
model = tf.keras.models.load_model(model_path)



In [8]:
model.input

<tf.Tensor 'input_1_1:0' shape=(None, 416, 416, 3) dtype=float32>

In [7]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 416, 416, 3) 0                                            
__________________________________________________________________________________________________
conv2d (Conv2D)                 (None, 416, 416, 32) 864         input_1[0][0]                    
__________________________________________________________________________________________________
batch_normalization (BatchNorma (None, 416, 416, 32) 128         conv2d[0][0]                     
__________________________________________________________________________________________________
tf_op_layer_LeakyRelu (TensorFl (None, 416, 416, 32) 0           batch_normalization[0][0]        
______________________________________________________________________________________________

__________________________________________________________________________________________________
tf_op_layer_Tile/multiples (Ten (2,)                 0           tf_op_layer_strided_slice_1[0][0]
__________________________________________________________________________________________________
tf_op_layer_strided_slice_18 (T (1, None)            0           tf_op_layer_Range_3[0][0]        
__________________________________________________________________________________________________
tf_op_layer_Tile_4/multiples (T (2,)                 0           tf_op_layer_strided_slice_12[0][0
__________________________________________________________________________________________________
tf_op_layer_strided_slice_17 (T (None, 1)            0           tf_op_layer_Range_2[0][0]        
__________________________________________________________________________________________________
tf_op_layer_Tile_3/multiples (T (2,)                 0           tf_op_layer_strided_slice_12[0][0
__________

tf_op_layer_strided_slice_21 (T (1, None, None, 1, 2 0           tf_op_layer_concat_5[0][0]       
__________________________________________________________________________________________________
tf_op_layer_Tile_5/multiples (T (5,)                 0           tf_op_layer_strided_slice_11[0][0
__________________________________________________________________________________________________
tf_op_layer_Reshape_2 (TensorFl (None, None, None, N 0           conv2d_58[0][0]                  
                                                                 tf_op_layer_Reshape_2/shape[0][0]
__________________________________________________________________________________________________
tf_op_layer_strided_slice_32 (T (1, None, None, 1, 2 0           tf_op_layer_concat_8[0][0]       
__________________________________________________________________________________________________
tf_op_layer_Tile_8/multiples (T (5,)                 0           tf_op_layer_strided_slice_22[0][0
__________

Load video

In [21]:
def get_next_frame(vid):
    """
    """
    # Get next frame
    return_value, frame = vid.read()
    
    # If there is a frame then change color scale
    if return_value:
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        return frame
    else:
        return None
    
def predict_on_frame(frame):
    """
    """
    # Preprocess image (change size and convert to np array)
    frame_size = frame.shape[:2]
    image_data = utils.image_preporcess(np.copy(frame), [input_size, input_size])
    image_data = image_data[np.newaxis, ...].astype(np.float32)
    
    gc.collect()

    # Predict using keras model
    pred_bbox = model.predict_on_batch(image_data)
    
    del image_data
    gc.collect()

    # Post process boxes to so that they can be draw on image
    pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox]
    pred_bbox = tf.concat(pred_bbox, axis=0)
    bboxes = utils.postprocess_boxes(pred_bbox, frame_size, input_size, 0.3)
    bboxes = utils.nms(bboxes, 0.45, method='nms')
    
    # Draw box on image
    image = utils.draw_bbox(frame, bboxes)
    
    del frame, pred_bbox
    gc.collect()

    # Convert image to np array
    result = np.asarray(image)
    
    # Add an information at the top of the image
    info = 'no object' if len(bboxes) < 1 else 'object detected'
    cv2.putText(result, text=info, org=(50, 70), fontFace=cv2.FONT_HERSHEY_SIMPLEX,
                fontScale=2, color=(255, 255, 255), thickness=5)
    
    result = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    
    del bboxes, image
    gc.collect()
    
    if cv2.waitKey(1) & 0xFF == ord('q'): return None
    return result



def main(video_path, break_at=60, ignore_rate=None):
    """
    """
    vid = cv2.VideoCapture(video_path)
    fps = round(vid.get(cv2.CAP_PROP_FPS),0)
    
    t0 = time.time()
    
    cnt = 0
    frame = get_next_frame(vid)
    height,width = frame.shape[:2]
    
    nb_frames_ignored = fps if ignore_rate is None else int(fps*ignore_rate)
    out_fps = fps / nb_frames_ignored
    print(out_fps)
    
    print('Original video: %ix%i and %i fps'%(width, height, fps))
    
    out = cv2.VideoWriter('test.mp4', cv2.VideoWriter_fourcc(*'DIVX'), out_fps, (width, height))
    
    while frame is not None:
        if (ignore_rate is not None) & (cnt != 0):
            if cnt%nb_frames_ignored != 0:
                cnt += 1
                frame = get_next_frame(vid)
                continue
        
        image = predict_on_frame(frame)
        
        out.write(image)
        
        del image
        gc.collect()
        frame = get_next_frame(vid)
        
        if cnt%nb_frames_ignored == 0:
            print('%i frames analysed in %.2fs'%(cnt, time.time()-t0))
        cnt += 1
            
        if break_at is not None:
            if cnt >= break_at:
                break
                
    vid.release()
    out.release()
    
gc.collect()
main(video_path, break_at=100, ignore_rate=0.5)

2.0
Original video: 3840x2160 and 30 fps
0 frames analysed in 1.15s
15 frames analysed in 2.43s
30 frames analysed in 3.72s
45 frames analysed in 4.97s
60 frames analysed in 6.22s
75 frames analysed in 7.48s
90 frames analysed in 8.74s
105 frames analysed in 10.00s
