In [None]:
import colorsys
import os
import cv2
import numpy as np
import tensorflow as tf
import tensorflow.keras.models as models
from PIL import Image, ImageFont, ImageDraw
from tensorflow.keras import backend as K
import model as yolo

In [None]:
MODEL_PATH = './yolo.h5'
ANCHOR_PATH = './yolo_anchors.txt'
CLASS_PATH = './coco_classes.txt'
INPUT_VIDEO = 'undistorted_cameradata/camera_3.mp4'
OUTPUT_PATH = 'object_detection_test_4.avi'
VIDEO_OUTPUT = {
    'width': 640,
    'height': 480
}

with tf.device('/gpu:1'):
    config = tf.ConfigProto(intra_op_parallelism_threads=4,
                            inter_op_parallelism_threads=4, allow_soft_placement=True, \
                            device_count={'CPU': 1, 'GPU': 1})
    sess = tf.Session(config=config)
    K.set_session(sess)

sess = K.get_session()
yolo_model = models.load_model(MODEL_PATH, compile=False)

input_image_shape = K.placeholder(shape=(2,))


def _get_anchors(path=ANCHOR_PATH):
    anchors_path = os.path.expanduser(path)
    with open(anchors_path) as f:
        anchors = f.readline()
    anchors = [float(x) for x in anchors.split(',')]
    return np.array(anchors).reshape(-1, 2)


def _get_class(path=CLASS_PATH):
    classes_path = os.path.expanduser(path)
    with open(classes_path) as f:
        class_names = f.readlines()
    class_names = [c.strip() for c in class_names]
    return class_names


class_names = _get_class()
anchors = _get_anchors()


def generate():
    boxes, scores, classes = yolo.yolo_eval(yolo_model.output, anchors, len(class_names), input_image_shape)

    return boxes, scores, classes


boxes, scores, classes = generate()


def letterbox_image(image, size):
    '''resize image with unchanged aspect ratio using padding'''
    iw, ih = image.size
    w, h = size
    scale = min(w / iw, h / ih)
    nw = int(iw * scale)
    nh = int(ih * scale)

    image = image.resize((nw, nh), Image.BICUBIC)
    new_image = Image.new('RGB', size, (128, 128, 128))
    new_image.paste(image, ((w - nw) // 2, (h - nh) // 2))
    return new_image


def detect_image(image):
    new_image_size = (image.width - (image.width % 32), image.height - (image.height % 32))
    boxed_image = letterbox_image(image, new_image_size)
    image_data = np.array(boxed_image, dtype='float32')

    image_data /= 255.
    image_data = np.expand_dims(image_data, 0)  # Add batch dimension.

    print('start running in session')

    out_boxes, out_scores, out_classes = sess.run(
        [boxes, scores, classes],
        feed_dict={
            yolo_model.input: image_data,
            input_image_shape: [image.size[1], image.size[0]],
            K.learning_phase(): 0
        })

    print('Found {} boxes for {}'.format(len(out_boxes), 'img'))

    font = ImageFont.truetype(font='font/FiraMono-Medium.otf',
                              size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32'))
    thickness = (image.size[0] + image.size[1]) // 300

    hsv_tuples = [(x / len(class_names), 1., 1.)
                  for x in range(len(class_names))]
    colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
    colors = list(map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), colors))

    for i, c in reversed(list(enumerate(out_classes))):
        predicted_class = class_names[c]
        box = out_boxes[i]
        score = out_scores[i]

        label = '{} {:.2f}'.format(predicted_class, score)
        draw = ImageDraw.Draw(image)
        label_size = draw.textsize(label, font)

        top, left, bottom, right = box
        top = max(0, np.floor(top + 0.5).astype('int32'))
        left = max(0, np.floor(left + 0.5).astype('int32'))
        bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32'))
        right = min(image.size[0], np.floor(right + 0.5).astype('int32'))
        print(label, (left, top), (right, bottom))

        if top - label_size[1] >= 0:
            text_origin = np.array([left, top - label_size[1]])
        else:
            text_origin = np.array([left, top + 1])

        # My kingdom for a good redistributable image drawing library.
        for i in range(thickness):
            draw.rectangle(
                [left + i, top + i, right - i, bottom - i],
                outline=colors[c])
        draw.rectangle(
            [tuple(text_origin), tuple(text_origin + label_size)],
            fill=colors[c])
        draw.text(text_origin, label, fill=(0, 0, 0), font=font)
        del draw

    return image


cap = cv2.VideoCapture(INPUT_VIDEO)
# cap.set(1, 3120)
fourcc = cv2.VideoWriter_fourcc(*'XVID')
fps = cap.get(cv2.CAP_PROP_FPS)
out = cv2.VideoWriter(OUTPUT_PATH, fourcc, fps, (VIDEO_OUTPUT['width'], VIDEO_OUTPUT['height']))

counter = 0
while True:

    # read the frames
    hasNext, frame = cap.read()

    # end the loop of no frame is left to process
    if not counter < 10:
        break

    if not hasNext:
        break

    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    frame = cv2.resize(frame, (VIDEO_OUTPUT['width'], VIDEO_OUTPUT['height']))
    frame = Image.fromarray(frame)
    frame = detect_image(frame)

    out.write(np.array(frame))
    counter = counter + 1
    print('count {}'.format(counter))

sess.close()
