In [1]:
import os
import sys
import random
import time
import numpy as np
import cv2
import json
import matplotlib.pyplot as plt
import imutils
import colorsys
import tensorflow as tf

from macacnn_improvement import MACA
from macacnn_improvement.config import Config
from macacnn_improvement.scripts.cocovg import CocoVGConfig

TOKENIZER = "/Users/liyiming/Desktop/Birmingham Life/project/DATASET/COCOVG/tokenizer.json"
VIDEO_INPUT_ROOT = "/Users/liyiming/Desktop/Birmingham Life/project/DATASET/source_material"
VIDEO_OUTPUT_ROOT = "/Users/liyiming/Desktop/Birmingham Life/project/DATASET/outputs"
WEIGHT_PATH = "/Users/liyiming/Downloads/macacnn_cocovg.hdf5"
MODEL_DIR = "../"

In [2]:
with open(TOKENIZER, 'r') as load_f:
  js_tok = json.load(load_f)
tokenizer = tf.keras.preprocessing.text.tokenizer_from_json(js_tok)

In [3]:
class InferenceConfig(CocoVGConfig):
    GPU_COUNT = 1
    IMAGES_PER_GPU = 1
    BACKBONE = 'efficientnet-b3'

inference_config = InferenceConfig()

# Recreate the model in inference mode
model = MACA.MACA(mode="inference", 
                  config=inference_config,
                  model_dir=MODEL_DIR,
                  tokenizer=tokenizer)

# Load trained weights
print("[INFO] Loading weights from ", WEIGHT_PATH)
model.load_weights(WEIGHT_PATH, by_name=True)

W0809 17:33:51.163639 4445736384 deprecation.py:506] From /Users/liyiming/.local/lib/python3.6/site-packages/tensorflow/python/ops/resource_variable_ops.py:1666: calling BaseResourceVariable.__init__ (from tensorflow.python.ops.resource_variable_ops) with constraint is deprecated and will be removed in a future version.
Instructions for updating:
If using Keras pass *_constraint arguments to layers.


[INFO] Loading weights from  /Users/liyiming/Downloads/macacnn_cocovg.hdf5
Start loading weights from /Users/liyiming/Downloads/macacnn_cocovg.hdf5
Load weights successfully.
Set log dir successfully.


In [4]:
def apply_mask(image, mask, color, alpha=0.5):
    # Apply mask to the image
    for c in range(3):
        image[:, :, c] = np.where(mask == 1,
                                  image[:, :, c] *
                                  (1 - alpha) + alpha * color[c] * 255,
                                  image[:, :, c])
    return image


def visualize(image, boxes, masks, ids, captions, scores=None, colors=None,
              N=None, attentions=None, show_attentions=False, show_boxes=True, 
              show_captions=True, show_masks=True):
  
    if N:
        N = np.minimum(N, boxes.shape[0])
        N = np.random.choice(np.arange(bboxes.shape[0]), N)
    else:
        N = np.arange(boxes.shape[0])

    if not N.shape[0]:
        print("No caption instances to display!\n")

    caption_colors = colors or Config.COLORS

    if show_masks:
        if masks.shape[2] == 0:
            print("No mask instances to display!\n")
        mask_colors = colors or Config.COLORS
        for i in range(masks.shape[2]):
            color = mask_colors[ids[i]-1]
            mask = masks[:, :, i]
            image = apply_mask(image, mask, color)

    if show_boxes:
        for i in N:
            if not np.any(boxes[i]):
                continue
            color = caption_colors[i]
            y1, x1, y2, x2 = boxes[i]
            image = cv2.rectangle(image, (x1, y1), (x2, y2), color, 2)
            if show_captions:
                image = cv2.putText(image, captions[i], (x1, y1 + 4), cv2.FONT_HERSHEY_COMPLEX, 0.7, color, 2)

    return image

In [5]:
video_name = 'videoplayback.mp4'
output_name = video_name.split(".")[0] + ".mp4"
cap = cv2.VideoCapture(os.path.join(VIDEO_INPUT_ROOT, video_name))
prop = cv2.cv.CV_CAP_PROP_FRAME_COUNT if imutils.is_cv2() else cv2.CAP_PROP_FRAME_COUNT
total = int(cap.get(prop))
frame_rate = int(cap.get(5))
frame_width = int(cap.get(3))
frame_height = int(cap.get(4))
fourcc = cv2.VideoWriter_fourcc(*'MP4V')

print("[INFO] Read video from: {}".format(os.path.join(VIDEO_INPUT_ROOT, video_name)))
print("[INFO] Video output to: {}".format(os.path.join(VIDEO_OUTPUT_ROOT, output_name)))
print("[INFO] Total frames in video: {}".format(total))
print("[INFO] Video frame rates: {}".format(frame_rate))
print("[INFO] Video frame width: {}".format(frame_width))
print("[INFO] Video frame height: {}".format(frame_height))

if not os.path.exists(VIDEO_OUTPUT_ROOT):
    os.mkdir(VIDEO_OUTPUT_ROOT)
out = cv2.VideoWriter(os.path.join(VIDEO_OUTPUT_ROOT, output_name), 
                      fourcc, frame_rate, (frame_width, frame_height), True)

[INFO] Read video from: /Users/liyiming/Desktop/Birmingham Life/project/DATASET/source_material/videoplayback.mp4
[INFO] Video output to: /Users/liyiming/Desktop/Birmingham Life/project/DATASET/outputs/videoplayback.mp4
[INFO] Total frames in video: 5425
[INFO] Video frame rates: 30
[INFO] Video frame width: 640
[INFO] Video frame height: 360


In [None]:
number_of_frame = 1
while cap.isOpened():
    s = time.time()
    ret, frame = cap.read()
    if ret:
        results = model.detect([frame], verbose=0)
        r = results[0]

        # clear_output(wait=True)

        frame = visualize(frame, r['rois'], r['masks'], r['class_ids'], r['captions'], r['scores'], show_boxes=False)
        out.write(frame)

        print("Currently frame {}, {:.2f} s, fps: {:.2f}".format(
            number_of_frame, number_of_frame / 25, 1 / (time.time() - s)))
        number_of_frame += 1
        if number_of_frame == total:
            break

    cv2.imshow("frame", frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows() 