# NST for videos

based on [Fast Style Transfer for Arbitrary Styles](https://www.tensorflow.org/hub/tutorials/tf2_arbitrary_image_stylization)

In [5]:
import functools
import os

from matplotlib import gridspec
import matplotlib.pylab as plt
import numpy as np
import tensorflow as tf
import tensorflow_hub as hub
from google.colab import files
import cv2

print("TF Version: ", tf.__version__)
print("TF Hub version: ", hub.__version__)
print("Eager mode enabled: ", tf.executing_eagerly())
print("GPU available: ", tf.config.list_physical_devices('GPU'))

TF Version:  2.9.2
TF Hub version:  0.12.0
Eager mode enabled:  True
GPU available:  [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [2]:
# @title Define image loading and visualization functions  { display-mode: "form" }
def crop_center(image):
  """Returns a cropped square image."""
  shape = image.shape
  new_shape = min(shape[1], shape[2])
  offset_y = max(shape[1] - shape[2], 0) // 2
  offset_x = max(shape[2] - shape[1], 0) // 2
  image = tf.image.crop_to_bounding_box(
      image, offset_y, offset_x, new_shape, new_shape)
  return image

@functools.lru_cache(maxsize=None)
def load_image(image_url, image_size=(256, 256), preserve_aspect_ratio=True):
  """Loads and preprocesses images."""
  # Cache image file locally.
  image_path = tf.keras.utils.get_file(os.path.basename(image_url)[-128:], image_url)
  # Load and convert to float32 numpy array, add batch dimension, and normalize to range [0, 1].
  img = tf.io.decode_image(
      tf.io.read_file(image_path),
      channels=3, dtype=tf.float32)[tf.newaxis, ...]
  img = crop_center(img)
  img = tf.image.resize(img, image_size, preserve_aspect_ratio=True)
  return img

def load_image_p(image_path, image_size=(256, 256), preserve_aspect_ratio=True):
  img = tf.io.decode_image(
      tf.io.read_file(image_path),
      channels=3, dtype=tf.float32)[tf.newaxis, ...]
  img = crop_center(img)
  img = tf.image.resize(img, image_size, preserve_aspect_ratio=True)
  return img


def show_n(images, titles=('',)):
  n = len(images)
  image_sizes = [image.shape[1] for image in images]
  w = (image_sizes[0] * 6) // 320
  plt.figure(figsize=(w * n, w))
  gs = gridspec.GridSpec(1, n, width_ratios=image_sizes)
  for i in range(n):
    plt.subplot(gs[i])
    plt.imshow(images[i][0], aspect='equal')
    plt.axis('off')
    plt.title(titles[i] if len(titles) > i else '')
  plt.show()

In [3]:
def style_video(video, style):
  # convert frames to tensor
  def np2tf(frame):
    frametf = tf.constant(frame/256, dtype=tf.float32)
    frametf = tf.image.resize(frametf, frame_size, preserve_aspect_ratio=True)
    frametf = frametf[tf.newaxis, ...]
    return frametf

  # get video file
  video_filename = next(iter(video))
  video = cv2.VideoCapture(video_filename)

  # copy video fps
  video_fps = video.get(cv2.CAP_PROP_FPS)
  # get frames from the video
  frames = []
  while (True):
      success, frame = video.read()
      if success:
          frames.append(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
  video.release()
  # set up frame size. For such framework and our video, frame size is max(width, height)=512
  frame_size = (512, 512)
  # map each frame to tensor from defined finction
  framestf = list(map(np2tf, frames))
  
  # increase batch size if you have better performance capability
  BATCH_SIZE = 1
  batches = []
  # create a list of batched to feed into the model
  for j in range(0, len(framestf), BATCH_SIZE):
    batches.append(tf.concat(framestf[j:j+BATCH_SIZE], 0))

  # Load TF Hub module.
  hub_handle = 'https://tfhub.dev/google/magenta/arbitrary-image-stylization-v1-256/2'
  hub_module = hub.load(hub_handle)
 
  # load style image
  style_filename = next(iter(style))
  style_image = load_image_p(style_filename, image_size=frame_size)
  
  # store resulting images
  results = []

  # for each batch get batch of styled frames
  for batch in batches:
    outputs = hub_module(tf.constant(batch), tf.constant(style_image))
   
    def tensor2image(tnsr):
      return (tnsr.numpy().squeeze()*256).astype(np.uint8)
    # convert tensors to images and store in results
    results += list(map(tensor2image, outputs))
  
  # used to make video as.mp4
  fourcc = cv2.VideoWriter_fourcc(*'XVID')
  # get video shape
  height,width,layers=results[1].shape

  # create resulting video
  res_video=cv2.VideoWriter(f'result_std.mp4',fourcc,video_fps,(width,height))

  for frame in results:
    res_video.write(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))

  cv2.destroyAllWindows()
  res_video.release()
  
  """
  Optical flow part:
  1) Get original and styled images
  2) Calculate flow between original consecutive frames
  3) Transfer difference from step 2 into styles frames
  4) Save new frames in a video
  """ 
  # get original frames
  frames_r = list(map(lambda x: cv2.resize(x, (res_copy[0].shape[1], res_copy[0].shape[0])), frames))
  res_copy = results.copy()
  # create video temples as above
  fourcc = cv2.VideoWriter_fourcc(*'XVID')
  height,width,layers=results[1].shape
  res_video=cv2.VideoWriter(f'result_opt_flow.mp4',fourcc,video_fps,(width,height))

  
  res_video.write(cv2.cvtColor(res_copy[0], cv2.COLOR_BGR2RGB))

  for i in range(1, len(res_copy)):
    prev = cv2.cvtColor(frames_r[i-1], cv2.COLOR_BGR2GRAY)
    current = cv2.cvtColor(frames_r[i], cv2.COLOR_BGR2GRAY)
    mask = np.zeros_like(frames_r[i])

    mask[..., 1] = 255

    # Calculates dense optical flow by Farneback method
    flow = cv2.calcOpticalFlowFarneback(prev, current,
            None,
            0.5, 3, 13, 3, 5, 1.2, 0)
    
    # Computes the magnitude and angle of the 2D vectors
    magnitude, _ = cv2.cartToPolar(flow[..., 0], flow[..., 1])
    
    # magnitude (normalized)
    magnitude = cv2.normalize(magnitude, None, 0, 255, cv2.NORM_MINMAX) / 255

    # get magnitute as 3-channel (for each colour)
    rgb = np.dstack([magnitude,magnitude,magnitude])
    # filter magnitute
    rgb = np.where(rgb < 0.06, rgb, 1)
    # leave unchanged regions from previous frame and only update changed ones from current frame  
    current_w = (res_copy[i-1] * (1 - rgb) + res_copy[i] * rgb).astype(np.uint8)
    res_copy[i] = cv2.cvtColor(current_w, cv2.COLOR_BGR2RGB)

    res_video.write(res_copy[i])
    
  res_video.release()

In [6]:
# load files
video = files.upload()

Saving scene_3.mp4 to scene_3.mp4


In [7]:
style = files.upload()

Saving style (1).png to style (1).png


In [None]:
# call function
style_video(video, style)