# Implement YOLOV3 from `YunYang94` code

[Source code](https://github.com/YunYang1994/TensorFlow2.0-Examples/tree/f99fcef22caa2758b5eefce10ee789384345506d/4-Object_Detection/YOLOV3) that this project is based on.

----

Todo :
1. Create a pipeline for a video :
    - Load video
    - Convert video to N frames
    - Predict on each frame if there is any object using YoloV3
        - if yes : draw boxes and label on top
    - Regroup all frames in the same video
    
2. Update YoloV3 model for drone use case :
    - Load and prepare data for train
        - csv labels (x_min, y_min, x_max, y_max) to bboxes `at first only on class`
        - check one one random img (5 ~ 6 times to be sure)
    - Change config (simplify it)
    - Check needed files / functions
    - Try to train
    - ...
  

## Load packages

In [None]:
import cv2
import time
import numpy as np
import core.utils as utils
import tensorflow as tf
from core.yolov3 import YOLOv3, decode

from tqdm import tqdm
import os
import matplotlib.pyplot as plt
import gc

In [None]:
print('tensorflow version:', tf.__version__)

physical_devices = tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], True)

tf.per_process_gpu_memory_fraction = 0.3

## 1. Load video and predict on each frame

In [3]:
video_path      = "./docs/road.mp4" #"/data/archives/1.1.11.MP4"
# video_path      = 0
num_classes     = 80
input_size      = 416

Load YOLOV3 model

In [4]:
# Load from weight file

# input_layer  = tf.keras.layers.Input([input_size, input_size, 3])
# feature_maps = YOLOv3(input_layer)

# bbox_tensors = []
# for i, fm in enumerate(feature_maps):
#     bbox_tensor = decode(fm, i)
#     bbox_tensors.append(bbox_tensor)

# model = tf.keras.Model(input_layer, bbox_tensors)
# utils.load_weights(model, "/data/model/yolov3.weights")

In [9]:
# Load from h5 file
model_path = '/data/model/yolov3.h5'
model = tf.keras.models.load_model(model_path)



In [10]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 416, 416, 3) 0                                            
__________________________________________________________________________________________________
conv2d (Conv2D)                 (None, 416, 416, 32) 864         input_1[0][0]                    
__________________________________________________________________________________________________
batch_normalization (BatchNorma (None, 416, 416, 32) 128         conv2d[0][0]                     
__________________________________________________________________________________________________
tf_op_layer_LeakyRelu (TensorFl (None, 416, 416, 32) 0           batch_normalization[0][0]        
______________________________________________________________________________________________

Total params: 62,001,757
Trainable params: 61,949,149
Non-trainable params: 52,608
__________________________________________________________________________________________________


Load video

In [11]:
vid = cv2.VideoCapture(video_path)

In [13]:
def detect_on_next_frame(vid, out_dir='.', n_frame=0):
    """
    """
    return_value, frame = vid.read()
    if return_value:
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    else:
        return False
        
    frame_size = frame.shape[:2]
    image_data = utils.image_preporcess(np.copy(frame), [input_size, input_size])
    image_data = image_data[np.newaxis, ...].astype(np.float32)
    
    gc.collect()

    pred_bbox = model.predict_on_batch(image_data)
    
    del image_data
    gc.collect()

    pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox]
    pred_bbox = tf.concat(pred_bbox, axis=0)
    bboxes = utils.postprocess_boxes(pred_bbox, frame_size, input_size, 0.3)
    bboxes = utils.nms(bboxes, 0.45, method='nms')
    
    image = utils.draw_bbox(frame, bboxes)
    
    info = 'no object' if len(bboxes) < 1 else 'object detected'
    
    del bboxes, frame, pred_bbox
    gc.collect()

    result = np.asarray(image)
    
    cv2.putText(result, text=info, org=(50, 70), fontFace=cv2.FONT_HERSHEY_SIMPLEX,
                fontScale=2, color=(255, 255, 255), thickness=5)
    
    result = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    
#     fig, ax = plt.subplots(figsize=(15,10))
#     plt.imshow(result)
    
    cv2.imwrite("%s/frame%s.jpg" % (out_dir, n_frame), result)
    
    del result
    gc.collect()
    
    if cv2.waitKey(1) & 0xFF == ord('q'): return False
    return True


def create_dir(dirname):
    if not os.path.exists(dirname):
        os.mkdir(dirname)
        print("Directory", dirname, "created.")
    else:
        print("Directory ", dirname, " already exists.")

out_dir = './test'
create_dir(dirname=out_dir)
i = 0
go_on = True

t0 = time.time()

while go_on:
    go_on = detect_on_next_frame(vid, out_dir, n_frame=i)
    i += 1
    if i%10 == 0:
        print('%i frames analysed in %.2fs'%(i, time.time()-t0))

Directory  ./test  already exists.
10 frames analysed in 14.36s
20 frames analysed in 20.19s
30 frames analysed in 25.88s


In [6]:
def convert_frames_to_video(frame_dir, out_file, fps=20.0, size=(2160, 3840)):
    """
    """
    files = [f for f in os.listdir(frame_dir) if os.path.isfile(os.path.join(frame_dir, f))]
    t0 = time.time()
    
    #for sorting the file names properly
    files.sort(key = lambda x: int(x[5:-4]))
    
#     fourcc = cv2.VideoWriter_fourcc(*'DIVX')
#     fourcc = cv2.VideoWriter_fourcc('M', 'J', 'P', 'G')
#     out = cv2.VideoWriter(out_file, -1, fps, size)
#     out = cv2.VideoWriter('test.mp4', cv2.VideoWriter_fourcc(*'MJPG'), fps, (100,100))
    out = cv2.VideoWriter('test.avi', cv2.VideoWriter_fourcc(*'DIVX'), fps, (100,100))

    for i in tqdm(range(len(files))):
        filename = frame_dir + '/' + files[i]
        #reading each files
        img = cv2.imread(filename)
        
#         fig, ax = plt.subplots(figsize=(15,10))
#         plt.imshow(img)
#         break
        #inserting the frames into an image array
        out.write(img)

        del img
        gc.collect()
        
        if i == 30:
            break
            
    out.release()
#     cv2.destroyAllWindows()

In [7]:
%%time

out_dir = './test'
convert_frames_to_video(out_dir, './test.mp4')

  5%|▌         | 30/600 [00:02<00:53, 10.59it/s]

CPU times: user 2.77 s, sys: 66.5 ms, total: 2.84 s
Wall time: 2.86 s





In [10]:
help(cv2.VideoWriter)

Help on built-in function VideoWriter:

VideoWriter(...)
    VideoWriter([filename, fourcc, fps, frameSize[, isColor]]) -> <VideoWriter object>

