# Implementation of MCNet in Pytorch
### Uses UCF-101 for trainig and prediction

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()
tf.compat.v1.disable_eager_execution()

import cv2
import numpy as np
import os
from os import listdir, makedirs, system
from os.path import exists
from argparse import ArgumentParser
from joblib import Parallel, delayed
import sys
import time
import imageio

import scipy.io as sio
import scipy.misc as sm

import random

! pip3 install imageio
import imageio

# BasicConvLSTMCell




In [None]:
class BasicConvLSTMCell(object):
    """Basic Conv LSTM recurrent network cell.
    """

    def __init__(self, shape, filter_size, num_features, forget_bias=1.0,
                 input_size=None, state_is_tuple=False, activation=tf.nn.tanh):
        """Initialize the basic Conv LSTM cell.
        Args:
          shape: int tuple thats the height and width of the cell
          filter_size: int tuple thats the height and width of the filter
          num_features: int thats the depth of the cell 
          forget_bias: float, The bias added to forget gates (see above).
          input_size: Deprecated and unused.
          state_is_tuple: If True, accepted and returned states are 2-tuples of
            the `c_state` and `m_state`.  If False, they are concatenated
            along the column axis.  The latter behavior will soon be deprecated.
          activation: Activation function of the inner states.
        """
        if input_size is not None:
            logging.warn("%s: The input_size parameter is deprecated.", self)
        self.shape = shape
        self.filter_size = filter_size
        self.num_features = num_features
        self._forget_bias = forget_bias
        self._state_is_tuple = state_is_tuple
        self._activation = activation

    @property
    def state_size(self):
        return (LSTMStateTuple(self._num_units, self._num_units)
                if self._state_is_tuple else 2 * self._num_units)

    @property
    def output_size(self):
        return self._num_units

    def __call__(self, inputs, state, scope=None, reuse=False):
        """Long short-term memory cell (LSTM)."""
        # "BasicLSTMCell"
        with tf.variable_scope(scope or type(self).__name__, reuse=reuse):
            # Parameters of gates are concatenated into one multiply for efficiency.
            if self._state_is_tuple:
                c, h = state
            else:
                c, h = tf.split(axis=3, num_or_size_splits=2, value=state)
            concat = _conv_linear([inputs, h], self.filter_size,
                                  self.num_features * 4, True)
            # i = input_gate, j = new_input, f = forget_gate, o = output_gate
            i, j, f, o = tf.split(axis=3, num_or_size_splits=4, value=concat)

            new_c = (c * tf.nn.sigmoid(f + self._forget_bias) + tf.nn.sigmoid(i) *
                     self._activation(j))
            new_h = self._activation(new_c) * tf.nn.sigmoid(o)

            if self._state_is_tuple:
                new_state = LSTMStateTuple(new_c, new_h)
            else:
                new_state = tf.concat(axis=3, values=[new_c, new_h])
            return new_h, new_state


In [None]:
def _conv_linear(args, filter_size, num_features, bias,
                 bias_start=0.0, scope=None, reuse=False):
    """convolution:
    Args:
      args: a 4D Tensor or a list of 4D, batch x n, Tensors.
      filter_size: int tuple of filter height and width.
      num_features: int, number of features.
      bias_start: starting value to initialize the bias; 0 by default.
      scope: VariableScope for the created subgraph; defaults to "Linear".
      reuse: For reusing already existing weights
    Returns:
      A 4D Tensor with shape [batch h w num_features]
    Raises:
      ValueError: if some of the arguments has unspecified or wrong shape.
    """

    # Calculate the total size of arguments on dimension 1.
    total_arg_size_depth = 0
    shapes = [a.get_shape().as_list() for a in args]
    for shape in shapes:
        if len(shape) != 4:
            raise ValueError(
                "Linear is expecting 4D arguments: %s" % str(shapes))
        if not shape[3]:
            raise ValueError(
                "Linear expects shape[4] of arguments: %s" % str(shapes))
        else:
            total_arg_size_depth += shape[3]

    dtype = [a.dtype for a in args][0]

    # Now the computation.
    with tf.variable_scope(scope or "Conv", reuse=reuse):
        matrix = tf.get_variable(
            "Matrix", [filter_size[0], filter_size[1],
                       total_arg_size_depth, num_features], dtype=dtype)
        if len(args) == 1:
            res = tf.nn.conv2d(args[0], matrix, strides=[
                               1, 1, 1, 1], padding='SAME')
        else:
            res = tf.nn.conv2d(tf.concat(axis=3, values=args), matrix,
                               strides=[1, 1, 1, 1], padding='SAME')
        if not bias:
            return res
        bias_term = tf.get_variable(
            "Bias", [num_features],
            dtype=dtype, initializer=tf.constant_initializer(bias_start,
                                                             dtype=dtype)
        )
    return res + bias_term


# Utils


In [None]:
def transform(image):
    return image/127.5 - 1.


def inverse_transform(images):
    return (images+1.)/2.


def save_images(images, size, image_path):
    return imsave(inverse_transform(images)*255., size, image_path)


def merge(images, size):
    h, w = images.shape[1], images.shape[2]
    img = np.zeros((h * size[0], w * size[1], 3))

    for idx, image in enumerate(images):
        i = idx % size[1]
        j = idx // size[1]
        img[j*h:j*h+h, i*w:i*w+w, :] = image

    return img


def imsave(images, size, path):
    return imageio.imwrite(path, merge(images, size))

In [None]:
def get_minibatches_idx(n, minibatch_size, shuffle=False):
    """ 
    Used to shuffle the dataset at each iteration.
    """

    idx_list = np.arange(n, dtype="int32")

    if shuffle:
        random.shuffle(idx_list)

    minibatches = []
    minibatch_start = 0
    for i in range(n // minibatch_size):
        minibatches.append(idx_list[minibatch_start:
                                    minibatch_start + minibatch_size])
        minibatch_start += minibatch_size

    if (minibatch_start != n):
        # Make a minibatch out of what is left
        minibatches.append(idx_list[minibatch_start:])

    return zip(range(len(minibatches)), minibatches)


def draw_frame(img, is_input):
    if img.shape[2] == 1:
        img = np.repeat(img, [3], axis=2)

    if is_input:
        img[:2, :, 0] = img[:2, :, 2] = 0
        img[:, :2, 0] = img[:, :2, 2] = 0
        img[-2:, :, 0] = img[-2:, :, 2] = 0
        img[:, -2:, 0] = img[:, -2:, 2] = 0
        img[:2, :, 1] = 255
        img[:, :2, 1] = 255
        img[-2:, :, 1] = 255
        img[:, -2:, 1] = 255
    else:
        img[:2, :, 0] = img[:2, :, 1] = 0
        img[:, :2, 0] = img[:, :2, 2] = 0
        img[-2:, :, 0] = img[-2:, :, 1] = 0
        img[:, -2:, 0] = img[:, -2:, 1] = 0
        img[:2, :, 2] = 255
        img[:, :2, 2] = 255
        img[-2:, :, 2] = 255
        img[:, -2:, 2] = 255

    return img


In [None]:
def load_kth_data(f_name, data_path, image_size, K, T):
    flip = np.random.binomial(1, .5, 1)[0]
    tokens = f_name.split()
    vid_path = data_path + tokens[0] + "_uncomp.avi"
    vid = imageio.get_reader(vid_path, "ffmpeg")
    low = int(tokens[1])
    high = np.min([int(tokens[2]), vid.get_length()])-K-T+1
    if low == high:
        stidx = 0
    else:
        if low >= high:
            print(vid_path)
        stidx = np.random.randint(low=low, high=high)
    seq = np.zeros((image_size, image_size, K+T, 1), dtype="float32")
    for t in range(K+T):
        img = cv2.cvtColor(cv2.resize(vid.get_data(stidx+t),
                           (image_size, image_size)),
                           cv2.COLOR_RGB2GRAY)
        seq[:, :, t] = transform(img[:, :, None])

    if flip == 1:
        seq = seq[:, ::-1]

    diff = np.zeros((image_size, image_size, K-1, 1), dtype="float32")
    for t in range(1, K):
        prev = inverse_transform(seq[:, :, t-1])
        next = inverse_transform(seq[:, :, t])
        diff[:, :, t-1] = next.astype("float32")-prev.astype("float32")

    return seq, diff


In [None]:
def load_ucf_data(f_name, data_path, trainlist, K, T):
    flip = np.random.binomial(1, .5, 1)[0]
    vid_path = data_path + f_name
    img_size = [240, 320]

    while True:
        try:
            vid = imageio.get_reader(vid_path, "ffmpeg")
            low = 1
            high = vid.get_length()-K-T+1
            if low == high:
                stidx = 0
            else:
                stidx = np.random.randint(low=low, high=high)
            seq = np.zeros((img_size[0], img_size[1], K+T, 3),
                           dtype="float32")
        #   print('#')
            for t in range(K+T):
                img = cv2.resize(vid.get_data(stidx+t),
                                 (img_size[1], img_size[0]))[:, :, ::-1]
                seq[:, :, t] = transform(img)

            if flip == 1:
                seq = seq[:, ::-1]

            diff = np.zeros((img_size[0], img_size[1], K-1, 1),
                            dtype="float32")
        #   print('+')
            for t in range(1, K):
                prev = inverse_transform(seq[:, :, t-1])*255
                prev = cv2.cvtColor(prev.astype("uint8"), cv2.COLOR_BGR2GRAY)
                next = inverse_transform(seq[:, :, t])*255
                next = cv2.cvtColor(next.astype("uint8"), cv2.COLOR_BGR2GRAY)
                diff[:, :, t-1, 0] = (next.astype("float32") -
                                      prev.astype("float32"))/255.
        #   print('*')
            break
        except Exception:
            # In case the current video is bad load a random one
            rep_idx = np.random.randint(low=0, high=len(trainlist))
            f_name = trainlist[rep_idx]
            vid_path = data_path + f_name
            print('.', end='')
    return seq, diff


# Ops

In [None]:
def batch_norm(inputs, name, train=True, reuse=False):
    return tf.layers.batch_normalization(inputs=inputs, training=train,
                                         reuse=reuse, name=name, scale=True)


In [None]:
def conv2d(input_, output_dim,
           k_h=5, k_w=5, d_h=2, d_w=2, stddev=0.02,
           name="conv2d", reuse=False, padding='SAME'):
    with tf.variable_scope(name, reuse=reuse):
        w = tf.get_variable('w', [k_h, k_w, input_.get_shape()[-1], output_dim],
                            initializer=tf.initializers.glorot_uniform())  # tf.contrib.layers.xavier_initializer())
        conv = tf.nn.conv2d(input_, w, strides=[
                            1, d_h, d_w, 1], padding=padding)

        biases = tf.get_variable('biases', [output_dim],
                                 initializer=tf.constant_initializer(0.0))
        conv = tf.reshape(tf.nn.bias_add(conv, biases), conv.get_shape())

        return conv


In [None]:
def deconv2d(input_, output_shape,
             k_h=5, k_w=5, d_h=2, d_w=2, stddev=0.02,
             name="deconv2d", reuse=False, with_w=False, padding='SAME'):
    with tf.variable_scope(name, reuse=reuse):
        # filter : [height, width, output_channels, in_channels]
        w = tf.get_variable('w', [k_h, k_h, output_shape[-1],
                                  input_.get_shape()[-1]],
                            initializer=tf.initializers.glorot_uniform())
        # print(d_h,d_w)
        try:
            deconv = tf.nn.conv2d_transpose(input_, w,
                                            output_shape=output_shape,
                                            strides=[1, d_h, d_w, 1],
                                            padding=padding)

        # Support for verisons of TensorFlow before 0.7.0
        except AttributeError:
            deconv = tf.nn.deconv2d(input_, w, output_shape=output_shape,
                                    strides=[1, d_h, d_w, 1])

        biases = tf.get_variable('biases', [output_shape[-1]],
                                 initializer=tf.constant_initializer(0.0))
        deconv = tf.reshape(tf.nn.bias_add(deconv, biases), deconv.get_shape())

        if with_w:
            return deconv, w, biases
        else:
            return deconv


In [None]:
def lrelu(x, leak=0.2, name="lrelu"):
    with tf.variable_scope(name):
        f1 = 0.5 * (1 + leak)
        f2 = 0.5 * (1 - leak)
        return f1 * x + f2 * abs(x)


def relu(x):
    return tf.nn.relu(x)


def tanh(x):
    return tf.nn.tanh(x)


In [None]:
def shape2d(a):
    """
    a: a int or tuple/list of length 2
    """
    if type(a) == int:
        return [a, a]
    if isinstance(a, (list, tuple)):
        assert len(a) == 2
        return list(a)
    raise RuntimeError("Illegal shape: {}".format(a))


def shape4d(a):
    # for use with tensorflow
    return [1] + shape2d(a) + [1]


In [None]:
def UnPooling2x2ZeroFilled(x):
    out = tf.concat(axis=3, values=[x, tf.zeros_like(x)])
    out = tf.concat(axis=2, values=[out, tf.zeros_like(out)])

    sh = x.get_shape().as_list()
    if None not in sh[1:]:
        out_size = [-1, sh[1] * 2, sh[2] * 2, sh[3]]
        return tf.reshape(out, out_size)
    else:
        sh = tf.shape(x)
        return tf.reshape(out, [-1, sh[1] * 2, sh[2] * 2, sh[3]])


In [None]:
def MaxPooling(x, shape, stride=None, padding='VALID'):
    """
    MaxPooling on images.
    :param input: NHWC tensor.
    :param shape: int or [h, w]
    :param stride: int or [h, w]. default to be shape.
    :param padding: 'valid' or 'same'. default to 'valid'
    :returns: NHWC tensor.
    """
    padding = padding.upper()
    shape = shape4d(shape)
    if stride is None:
        stride = shape
    else:
        stride = shape4d(stride)

    return tf.nn.max_pool(x, ksize=shape, strides=stride, padding=padding)


In [None]:
def FixedUnPooling(x, shape):
    """
    Unpool the input with a fixed mat to perform kronecker product with.
    :param input: NHWC tensor
    :param shape: int or [h, w]
    :returns: NHWC tensor
    """
    shape = shape2d(shape)

    # a faster implementation for this special case
    return UnPooling2x2ZeroFilled(x)


In [None]:
def gdl(gen_frames, gt_frames, alpha):
    """
    Calculates the sum of GDL losses between the predicted and gt frames.
    @param gen_frames: The predicted frames at each scale.
    @param gt_frames: The ground truth frames at each scale
    @param alpha: The power to which each gradient term is raised.
    @return: The GDL loss.
    """
    # create filters [-1, 1] and [[1],[-1]]
    # for diffing to the left and down respectively.
    pos = tf.constant(np.identity(3), dtype=tf.float32)
    neg = -1 * pos
    # [-1, 1]
    filter_x = tf.expand_dims(tf.stack([neg, pos]), 0)
    # [[1],[-1]]
    filter_y = tf.stack([tf.expand_dims(pos, 0), tf.expand_dims(neg, 0)])
    strides = [1, 1, 1, 1]  # stride of (1, 1)
    padding = 'SAME'

    gen_dx = tf.abs(tf.nn.conv2d(
        gen_frames, filter_x, strides, padding=padding))
    gen_dy = tf.abs(tf.nn.conv2d(
        gen_frames, filter_y, strides, padding=padding))
    gt_dx = tf.abs(tf.nn.conv2d(gt_frames, filter_x, strides, padding=padding))
    gt_dy = tf.abs(tf.nn.conv2d(gt_frames, filter_y, strides, padding=padding))

    grad_diff_x = tf.abs(gt_dx - gen_dx)
    grad_diff_y = tf.abs(gt_dy - gen_dy)

    gdl_loss = tf.reduce_mean((grad_diff_x ** alpha + grad_diff_y ** alpha))

    return gdl_loss

In [None]:
def linear(input_, output_size, name, stddev=0.02, bias_start=0.0,
           reuse=False, with_w=False):
    shape = input_.get_shape().as_list()

    with tf.variable_scope(name, reuse=reuse):
        matrix = tf.get_variable("Matrix", [shape[1], output_size], tf.float32,
                                 tf.random_normal_initializer(stddev=stddev))
        bias = tf.get_variable("bias", [output_size],
                               initializer=tf.constant_initializer(bias_start))
        if with_w:
            return tf.matmul(input_, matrix) + bias, matrix, bias
        else:
            return tf.matmul(input_, matrix) + bias

# MCNet


In [None]:
class MCNET(object):
    def __init__(self, image_size, batch_size=32, c_dim=3,
                 K=10, T=10, checkpoint_dir=None, is_train=True):

        self.batch_size = batch_size
        self.image_size = image_size
        self.is_train = is_train

        self.gf_dim = 64
        self.df_dim = 64

        self.c_dim = c_dim
        self.K = K
        self.T = T
        self.diff_shape = [batch_size, self.image_size[0],
                           self.image_size[1], K-1, 1]
        self.xt_shape = [batch_size, self.image_size[0],
                         self.image_size[1], c_dim]
        self.target_shape = [batch_size, self.image_size[0], self.image_size[1],
                             K+T, c_dim]

        self.build_model()

    def build_model(self):
        self.diff_in = tf.placeholder(
            tf.float32, self.diff_shape, name='diff_in')
        self.xt = tf.placeholder(tf.float32, self.xt_shape, name='xt')
        self.target = tf.placeholder(
            tf.float32, self.target_shape, name='target')

        cell = BasicConvLSTMCell([self.image_size[0]//8, self.image_size[1]//8],
                                 [3, 3], 256)
        pred = self.forward(self.diff_in, self.xt, cell)

        self.G = tf.concat(axis=3, values=pred)
        if self.is_train:
            true_sim = inverse_transform(self.target[:, :, :, self.K:, :])
            if self.c_dim == 1:
                true_sim = tf.tile(true_sim, [1, 1, 1, 1, 3])
            true_sim = tf.reshape(tf.transpose(true_sim, [0, 3, 1, 2, 4]),
                                  [-1, self.image_size[0],
                                   self.image_size[1], 3])
            gen_sim = inverse_transform(self.G)
            if self.c_dim == 1:
                gen_sim = tf.tile(gen_sim, [1, 1, 1, 1, 3])
            gen_sim = tf.reshape(tf.transpose(gen_sim, [0, 3, 1, 2, 4]),
                                 [-1, self.image_size[0],
                                  self.image_size[1], 3])
            binput = tf.reshape(self.target[:, :, :, :self.K, :],
                                [self.batch_size, self.image_size[0],
                                 self.image_size[1], -1])
            btarget = tf.reshape(self.target[:, :, :, self.K:, :],
                                 [self.batch_size, self.image_size[0],
                                  self.image_size[1], -1])
            bgen = tf.reshape(self.G, [self.batch_size,
                                       self.image_size[0],
                                       self.image_size[1], -1])

            good_data = tf.concat(axis=3, values=[binput, btarget])
            gen_data = tf.concat(axis=3, values=[binput, bgen])

            with tf.variable_scope("DIS", reuse=False):
                self.D, self.D_logits = self.discriminator(good_data)

            with tf.variable_scope("DIS", reuse=True):
                self.D_, self.D_logits_ = self.discriminator(gen_data)

            self.L_p = tf.reduce_mean(
                tf.square(self.G-self.target[:, :, :, self.K:, :])
            )
            self.L_gdl = gdl(gen_sim, true_sim, 1.)
            self.L_img = self.L_p + self.L_gdl

            self.d_loss_real = tf.reduce_mean(
                tf.nn.sigmoid_cross_entropy_with_logits(
                    logits=self.D_logits, labels=tf.ones_like(self.D)
                )
            )
            self.d_loss_fake = tf.reduce_mean(
                tf.nn.sigmoid_cross_entropy_with_logits(
                    logits=self.D_logits_, labels=tf.zeros_like(self.D_)
                )
            )
            self.d_loss = self.d_loss_real + self.d_loss_fake
            self.L_GAN = tf.reduce_mean(
                tf.nn.sigmoid_cross_entropy_with_logits(
                    logits=self.D_logits_, labels=tf.ones_like(self.D_)
                )
            )

            self.loss_sum = tf.summary.scalar("L_img", self.L_img)
            self.L_p_sum = tf.summary.scalar("L_p", self.L_p)
            self.L_gdl_sum = tf.summary.scalar("L_gdl", self.L_gdl)
            self.L_GAN_sum = tf.summary.scalar("L_GAN", self.L_GAN)
            self.d_loss_sum = tf.summary.scalar("d_loss", self.d_loss)
            self.d_loss_real_sum = tf.summary.scalar(
                "d_loss_real", self.d_loss_real)
            self.d_loss_fake_sum = tf.summary.scalar(
                "d_loss_fake", self.d_loss_fake)

            self.t_vars = tf.trainable_variables()
            self.g_vars = [var for var in self.t_vars if 'DIS' not in var.name]
            self.d_vars = [var for var in self.t_vars if 'DIS' in var.name]
            num_param = 0.0
            for var in self.g_vars:
                num_param += int(np.prod(var.get_shape()))
            print("Number of parameters: %d" % num_param)
        self.saver = tf.train.Saver(max_to_keep=10)

    def forward(self, diff_in, xt, cell):
        # Initial state
        state = tf.zeros([self.batch_size, self.image_size[0]//8,
                          self.image_size[1]//8, 512])
        reuse = False
        # Encoder
        for t in range(self.K-1):
            enc_h, res_m = self.motion_enc(diff_in[:, :, :, t, :], reuse=reuse)
            h_dyn, state = cell(enc_h, state, scope='lstm', reuse=reuse)
            reuse = True

        pred = []
        # Decoder
        for t in range(self.T):
            if t == 0:
                h_cont, res_c = self.content_enc(xt, reuse=False)
                h_tp1 = self.comb_layers(h_dyn, h_cont, reuse=False)
                res_connect = self.residual(res_m, res_c, reuse=False)
                x_hat = self.dec_cnn(h_tp1, res_connect, reuse=False)
            else:
                enc_h, res_m = self.motion_enc(diff_in, reuse=True)
                h_dyn, state = cell(enc_h, state, scope='lstm', reuse=True)
                h_cont, res_c = self.content_enc(xt, reuse=reuse)
                h_tp1 = self.comb_layers(h_dyn, h_cont, reuse=True)
                res_connect = self.residual(res_m, res_c, reuse=True)
                x_hat = self.dec_cnn(h_tp1, res_connect, reuse=True)

            if self.c_dim == 3:
                x_hat_rgb = tf.concat(axis=3,
                                      values=[x_hat[:, :, :, 2:3], x_hat[:, :, :, 1:2],
                                              x_hat[:, :, :, 0:1]])
                xt_rgb = tf.concat(axis=3,
                                   values=[xt[:, :, :, 2:3], xt[:, :, :, 1:2],
                                           xt[:, :, :, 0:1]])

                x_hat_gray = 1./255.*tf.image.rgb_to_grayscale(
                    inverse_transform(x_hat_rgb)*255.
                )
                xt_gray = 1./255.*tf.image.rgb_to_grayscale(
                    inverse_transform(xt_rgb)*255.
                )
            else:
                x_hat_gray = inverse_transform(x_hat)
                xt_gray = inverse_transform(xt)

            diff_in = x_hat_gray - xt_gray
            xt = x_hat
            pred.append(tf.reshape(x_hat, [self.batch_size, self.image_size[0],
                                           self.image_size[1], 1, self.c_dim]))

        return pred

    def motion_enc(self, diff_in, reuse):
        res_in = []
        conv1 = relu(conv2d(diff_in, output_dim=self.gf_dim, k_h=5, k_w=5,
                            d_h=1, d_w=1, name='dyn_conv1',  reuse=reuse))
        res_in.append(conv1)
        pool1 = MaxPooling(conv1, [2, 2])

        conv2 = relu(conv2d(pool1, output_dim=self.gf_dim*2, k_h=5, k_w=5,
                            d_h=1, d_w=1, name='dyn_conv2', reuse=reuse))
        res_in.append(conv2)
        pool2 = MaxPooling(conv2, [2, 2])

        conv3 = relu(conv2d(pool2, output_dim=self.gf_dim*4, k_h=7, k_w=7,
                            d_h=1, d_w=1, name='dyn_conv3', reuse=reuse))
        res_in.append(conv3)
        pool3 = MaxPooling(conv3, [2, 2])
        return pool3, res_in

    def content_enc(self, xt, reuse):
        res_in = []
        conv1_1 = relu(conv2d(xt, output_dim=self.gf_dim, k_h=3, k_w=3,
                              d_h=1, d_w=1, name='cont_conv1_1', reuse=reuse))
        conv1_2 = relu(conv2d(conv1_1, output_dim=self.gf_dim, k_h=3, k_w=3,
                              d_h=1, d_w=1, name='cont_conv1_2', reuse=reuse))
        res_in.append(conv1_2)
        pool1 = MaxPooling(conv1_2, [2, 2])

        conv2_1 = relu(conv2d(pool1, output_dim=self.gf_dim*2, k_h=3, k_w=3,
                              d_h=1, d_w=1, name='cont_conv2_1', reuse=reuse))
        conv2_2 = relu(conv2d(conv2_1, output_dim=self.gf_dim*2, k_h=3, k_w=3,
                              d_h=1, d_w=1, name='cont_conv2_2', reuse=reuse))
        res_in.append(conv2_2)
        pool2 = MaxPooling(conv2_2, [2, 2])

        conv3_1 = relu(conv2d(pool2, output_dim=self.gf_dim*4, k_h=3, k_w=3,
                              d_h=1, d_w=1, name='cont_conv3_1', reuse=reuse))
        conv3_2 = relu(conv2d(conv3_1, output_dim=self.gf_dim*4, k_h=3, k_w=3,
                              d_h=1, d_w=1, name='cont_conv3_2', reuse=reuse))
        conv3_3 = relu(conv2d(conv3_2, output_dim=self.gf_dim*4, k_h=3, k_w=3,
                              d_h=1, d_w=1, name='cont_conv3_3', reuse=reuse))
        res_in.append(conv3_3)
        pool3 = MaxPooling(conv3_3, [2, 2])
        return pool3, res_in

    def comb_layers(self, h_dyn, h_cont, reuse=False):
        comb1 = relu(conv2d(tf.concat(axis=3, values=[h_dyn, h_cont]),
                            output_dim=self.gf_dim*4, k_h=3, k_w=3,
                            d_h=1, d_w=1, name='comb1', reuse=reuse))
        comb2 = relu(conv2d(comb1, output_dim=self.gf_dim*2, k_h=3, k_w=3,
                            d_h=1, d_w=1, name='comb2', reuse=reuse))
        h_comb = relu(conv2d(comb2, output_dim=self.gf_dim*4, k_h=3, k_w=3,
                             d_h=1, d_w=1, name='h_comb', reuse=reuse))
        return h_comb

    def residual(self, input_dyn, input_cont, reuse=False):
        n_layers = len(input_dyn)
        res_out = []
        for l in range(n_layers):
            input_ = tf.concat(axis=3, values=[input_dyn[l], input_cont[l]])
            out_dim = input_cont[l].get_shape()[3]
            res1 = relu(conv2d(input_, output_dim=out_dim,
                               k_h=3, k_w=3, d_h=1, d_w=1,
                               name='res'+str(l)+'_1', reuse=reuse))
            res2 = conv2d(res1, output_dim=out_dim, k_h=3, k_w=3,
                          d_h=1, d_w=1, name='res'+str(l)+'_2', reuse=reuse)
            res_out.append(res2)
        return res_out

    def dec_cnn(self, h_comb, res_connect, reuse=False):
        shapel3 = [self.batch_size, self.image_size[0]//4,
                   self.image_size[1]//4, self.gf_dim*4]
        shapeout3 = [self.batch_size, self.image_size[0]//4,
                     self.image_size[1]//4, self.gf_dim*2]
        depool3 = FixedUnPooling(h_comb, [2, 2])
        deconv3_3 = relu(deconv2d(relu(tf.add(depool3, res_connect[2])),
                                  output_shape=shapel3, k_h=3, k_w=3,
                                  d_h=1, d_w=1, name='dec_deconv3_3', reuse=reuse))
        deconv3_2 = relu(deconv2d(deconv3_3, output_shape=shapel3, k_h=3, k_w=3,
                                  d_h=1, d_w=1, name='dec_deconv3_2', reuse=reuse))
        deconv3_1 = relu(deconv2d(deconv3_2, output_shape=shapeout3, k_h=3, k_w=3,
                                  d_h=1, d_w=1, name='dec_deconv3_1', reuse=reuse))

        shapel2 = [self.batch_size, self.image_size[0]//2,
                   self.image_size[1]//2, self.gf_dim*2]
        shapeout3 = [self.batch_size, self.image_size[0]//2,
                     self.image_size[1]//2, self.gf_dim]
        depool2 = FixedUnPooling(deconv3_1, [2, 2])
        deconv2_2 = relu(deconv2d(relu(tf.add(depool2, res_connect[1])),
                                  output_shape=shapel2, k_h=3, k_w=3,
                                  d_h=1, d_w=1, name='dec_deconv2_2', reuse=reuse))
        deconv2_1 = relu(deconv2d(deconv2_2, output_shape=shapeout3, k_h=3, k_w=3,
                                  d_h=1, d_w=1, name='dec_deconv2_1', reuse=reuse))

        shapel1 = [self.batch_size, self.image_size[0],
                   self.image_size[1], self.gf_dim]
        shapeout1 = [self.batch_size, self.image_size[0],
                     self.image_size[1], self.c_dim]
        depool1 = FixedUnPooling(deconv2_1, [2, 2])
        deconv1_2 = relu(deconv2d(relu(tf.add(depool1, res_connect[0])),
                         output_shape=shapel1, k_h=3, k_w=3, d_h=1, d_w=1,
                         name='dec_deconv1_2', reuse=reuse))
        xtp1 = tanh(deconv2d(deconv1_2, output_shape=shapeout1, k_h=3, k_w=3,
                             d_h=1, d_w=1, name='dec_deconv1_1', reuse=reuse))
        return xtp1

    def discriminator(self, image):
        h0 = lrelu(conv2d(image, self.df_dim, name='dis_h0_conv'))
        h1 = lrelu(batch_norm(conv2d(h0, self.df_dim*2, name='dis_h1_conv'),
                              "bn1"))
        h2 = lrelu(batch_norm(conv2d(h1, self.df_dim*4, name='dis_h2_conv'),
                              "bn2"))
        h3 = lrelu(batch_norm(conv2d(h2, self.df_dim*8, name='dis_h3_conv'),
                              "bn3"))
        h = linear(tf.reshape(h3, [self.batch_size, -1]), 1, 'dis_h3_lin')

        return tf.nn.sigmoid(h), h

    def save(self, sess, checkpoint_dir, step):
        model_name = "MCNET.model"

        if not os.path.exists(checkpoint_dir):
            os.makedirs(checkpoint_dir)

        self.saver.save(sess,
                        os.path.join(checkpoint_dir, model_name),
                        global_step=step)

    def load(self, sess, checkpoint_dir, model_name=None):
        print(" [*] Reading checkpoints...")

        ckpt = tf.train.get_checkpoint_state(checkpoint_dir)
        if ckpt and ckpt.model_checkpoint_path:
            ckpt_name = os.path.basename(ckpt.model_checkpoint_path)
            if model_name is None:
                model_name = ckpt_name
            self.saver.restore(sess, os.path.join(checkpoint_dir, model_name))
            print("     Loaded model: "+str(model_name))
            return True, model_name
        else:
            return False, None


# Main Function


In [None]:
def main(iters=0, lr=1e-4, batch_size=8, alpha=1.0, beta=0.001, K=4, T=1, num_iter=150000, gpu=0):

    data_path = "/content/drive/My Drive/VideoPrediction/mcnet/data/UCF101/UCF101_videos/"
    trainfiles = [f for f in listdir(data_path) if f.endswith(".avi")]
    np.random.shuffle(trainfiles)
    trainfiles = trainfiles[:1500]
    margin = 0.3
    updateD = True
    updateG = True
    image_size = [240, 320]
    c_dim = 3
    prefix = ("UCF_MCNET"
              + "_K="+str(K)
              + "_T="+str(T)
              + "_batch_size="+str(batch_size)
              + "_alpha="+str(alpha)
              + "_beta="+str(beta)
              + "_lr="+str(lr)+'test')

    print("\n"+prefix+"\n")
    checkpoint_dir = "models-ucf/"+prefix+"/"
    samples_dir = "samples-ucf/"+prefix+"/"
    summary_dir = "logs-ucf/"+prefix+"/"

    if not exists(checkpoint_dir):
        makedirs(checkpoint_dir)
    if not exists(samples_dir):
        makedirs(samples_dir)
    if not exists(summary_dir):
        makedirs(summary_dir)

    with tf.device("/gpu:%d" % gpu[0]):
        model = MCNET(image_size=image_size, c_dim=c_dim,
                      K=K, batch_size=batch_size, T=T,
                      checkpoint_dir=checkpoint_dir)
        d_optim = tf.train.AdamOptimizer(lr, beta1=0.5).minimize(
            model.d_loss, var_list=model.d_vars
        )
        g_optim = tf.train.AdamOptimizer(lr, beta1=0.5).minimize(
            alpha*model.L_img+beta*model.L_GAN, var_list=model.g_vars
        )

    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=1.0)
    with tf.Session(config=tf.ConfigProto(allow_soft_placement=True,
                    log_device_placement=False,
                    gpu_options=gpu_options)) as sess:

        tf.global_variables_initializer().run()

        if model.load(sess, checkpoint_dir):
            print(" [*] Load SUCCESS")
        else:
            print(" [!] Load failed...")
            print(checkpoint_dir)

        g_sum = tf.summary.merge([model.L_p_sum,
                                  model.L_gdl_sum, model.loss_sum,
                                  model.L_GAN_sum])
        d_sum = tf.summary.merge([model.d_loss_real_sum, model.d_loss_sum,
                                  model.d_loss_fake_sum])
        writer = tf.summary.FileWriter(summary_dir, sess.graph)

        counter = iters+1
        start_time = time.time()

        while iters < num_iter:
            mini_batches = get_minibatches_idx(len(trainfiles),
                                               batch_size, shuffle=True)
            for _, batchidx in mini_batches:
                for k in range(3):
                    if len(batchidx) == batch_size:
                        seq_batch = np.zeros((batch_size, image_size[0], image_size[1],
                                              K+T, c_dim), dtype="float32")
                        diff_batch = np.zeros((batch_size, image_size[0], image_size[1],
                                               K-1, 1), dtype="float32")
                        t0 = time.time()
                        Ts = np.repeat(np.array([T]), batch_size, axis=0)
                        Ks = np.repeat(np.array([K]), batch_size, axis=0)
                        paths = np.repeat(data_path, batch_size, axis=0)
                        tfiles = np.array(trainfiles)[batchidx]
                        # print(len(tfiles))
                        output = [load_ucf_data(f, p, trainfiles, k, t)
                                  for f, p, k, t in zip(tfiles, paths, Ks, Ts)]
                        # print(len(output))
                    for i in range(batch_size):
                        seq_batch[i] = output[i][0]
                        diff_batch[i] = output[i][1]
                    if updateD:
                        _, summary_str = sess.run([d_optim, d_sum],
                                                  feed_dict={model.diff_in: diff_batch,
                                                             model.xt: seq_batch[:, :, :, K-1],
                                                             model.target: seq_batch})
                        writer.add_summary(summary_str, counter)

                    if updateG:
                        _, summary_str = sess.run([g_optim, g_sum],
                                                  feed_dict={model.diff_in: diff_batch,
                                                             model.xt: seq_batch[:, :, :, K-1],
                                                             model.target: seq_batch})
                        writer.add_summary(summary_str, counter)

                    errD_fake = model.d_loss_fake.eval({model.diff_in: diff_batch,
                                                        model.xt: seq_batch[:, :, :, K-1],
                                                        model.target: seq_batch})
                    errD_real = model.d_loss_real.eval({model.diff_in: diff_batch,
                                                        model.xt: seq_batch[:, :, :, K-1],
                                                        model.target: seq_batch})
                    errG = model.L_GAN.eval({model.diff_in: diff_batch,
                                             model.xt: seq_batch[:, :, :, K-1],
                                             model.target: seq_batch})
                    # print('.',end='')
                    if errD_fake < margin or errD_real < margin:
                        updateD = False
                    if errD_fake > (1.-margin) or errD_real > (1.-margin):
                        updateG = False
                    if not updateD and not updateG:
                        updateD = True
                        updateG = True

                    counter += 1
                    if counter % 100 == 0:
                        newlog = "\nCounter: [%2d] Iters: [%2d] time: %4.4f, d_loss: %.8f, L_GAN: %.8f" % (
                            counter, iters, time.time() - start_time, errD_fake+errD_real, errG)
                        print(newlog)

                    if np.mod(counter, 500) == 1:
                        samples = sess.run([model.G],
                                           feed_dict={model.diff_in: diff_batch,
                                                      model.xt: seq_batch[:, :, :, K-1],
                                                      model.target: seq_batch})[0]
                        samples = np.concatenate((samples[:, :, :, 0, :],
                                                  seq_batch[:, :, :, K, :]), axis=0)
                        print("Saving sample ...")
                        save_images(samples[:, :, :, ::-1], [batch_size, batch_size],
                                    samples_dir+"train_%s.png" % (iters))
                    if np.mod(counter, 500) == 2:
                        print("Saving Model to : ", checkpoint_dir)
                        model.save(sess, checkpoint_dir, counter)

                    iters += 1


In [None]:
os.chdir('/content/drive/My Drive/VideoPrediction/mcnet/UCF101')
%reload_ext tensorboard
prefix =  'UCF_MCNET_K=10_T=3_batch_size=8_alpha=1.0_beta=0.001_lr=0.0001_test'
summary_dir  = "logs-ucf/"+prefix+"/"
%tensorboard --logdir {summary_dir}

In [None]:
lr=1e-4
batch_size=8
alpha=1.0
beta=0.001
K=10
T=3
num_iter=150000
gpu=[0]
iters=7200

In [None]:
tf.reset_default_graph()
main(iters,lr,batch_size,alpha,beta,K,T,num_iter,gpu)

# Generate Videos

In [None]:
! pip install pyssim

from PIL import Image
from PIL import ImageDraw
import ssim
import skimage.measure as measure

Collecting pyssim
  Downloading https://files.pythonhosted.org/packages/5f/03/65df3dde6843bcce9004e7a6a1a0657c5f84814840c4f671267c15cf1d34/pyssim-0.4.tar.gz
Building wheels for collected packages: pyssim
  Building wheel for pyssim (setup.py) ... [?25l[?25hdone
  Created wheel for pyssim: filename=pyssim-0.4-py2.py3-none-any.whl size=5814 sha256=9d7bd1ac240be658a85bc735f4d624137214143c3ecf1cee3c8034857af5617c
  Stored in directory: /root/.cache/pip/wheels/cf/7c/7b/2ebeff601772f28bfd22e128a91e85bb5df4e36d33df59a26c
Successfully built pyssim
Installing collected packages: pyssim
Successfully installed pyssim-0.4


In [None]:
def generate_samples():
    data_path = "/content/drive/My Drive/VideoPrediction/mcnet/data/UCF101/UCF101_videos/"
    ran = np.random.randint(low=1, high=5000, size=10)
    testfiles = [x for x in os.listdir(
        '/content/drive/My Drive/VideoPrediction/mcnet/data/UCF101/UCF101_videos/') if 'UCF_' in x]
    testfiles = np.random.choice(testfiles, size=10)
    image_size = [240, 320]
    c_dim = 3
    iters = 0
    prefix_mod = ("UCF_MCNET"
                  + "_K="+str(K)+'-'+str(K_test)
                  + "_T="+str(T)+'-'+str(T_test)
                  + "_batch_size="+str(batch_size)
                  + "_alpha="+str(alpha)
                  + "_beta="+str(beta)
                  + "_lr="+str(lr)
                  + 'iters='+str(iters))

    prefix = ("UCF_MCNET"
              + "_K="+str(K)
              + "_T="+str(T)
              + "_batch_size="+str(batch_size)
              + "_alpha="+str(alpha)
              + "_beta="+str(beta)
              + "_lr="+str(lr))
    os.chdir('/content/drive/MyDrive/VideoPrediction/mcnet/UCF101')
    checkpoint_dir = "models-ucf/"+prefix+"/"
    best_model = None  # will pick last model

    with tf.device("/gpu:%d" % gpu[0]):
        model = MCNET(image_size=image_size, batch_size=1, K=K,
                      T=T, c_dim=c_dim, checkpoint_dir=checkpoint_dir,
                      is_train=False)

    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.5)
    with tf.Session(config=tf.ConfigProto(allow_soft_placement=True,
                                          log_device_placement=False,
                                          gpu_options=gpu_options)) as sess:

        tf.global_variables_initializer().run()

        loaded, model_name = model.load(sess, checkpoint_dir, best_model)
        print(checkpoint_dir)
        if loaded:
            print(" [*] Load SUCCESS")
        else:
            print(" [!] Load failed... exitting")
            return

        quant_dir = "results/quantitative/UCF101/"+prefix_mod+"/"
        save_path = quant_dir+"results_model="+model_name+".npz"
        if not exists(quant_dir):
            makedirs(quant_dir)

        vid_names = []
        psnr_err = np.zeros((0, T_test))
        ssim_err = np.zeros((0, T_test))
        for i in range(0, len(testfiles)):
            print(" Video "+str(i)+"/"+str(len(testfiles)))

            vid_path = data_path + testfiles[i]
            vid = imageio.get_reader(vid_path, "ffmpeg")
            savedir = "results/images/UCF101/"+prefix_mod+"/"+str(i+1)

            seq_batch = np.zeros((1, image_size[0], image_size[1], K_test+T_test, c_dim),
                                 dtype="float32")
            diff_batch = np.zeros((1, image_size[0], image_size[1], K_test-1, 1),
                                  dtype="float32")
            for t in range(K_test+T_test):
                img = vid.get_data(t)[:, :, ::-1]
                seq_batch[0, :, :, t] = transform(img)

            for t in range(1, K_test):
                prev = inverse_transform(seq_batch[0, :, :, t-1])*255
                prev = cv2.cvtColor(prev.astype("uint8"), cv2.COLOR_BGR2GRAY)
                next = inverse_transform(seq_batch[0, :, :, t])*255
                next = cv2.cvtColor(next.astype("uint8"), cv2.COLOR_BGR2GRAY)
                diff = next.astype("float32")-prev.astype("float32")
                diff_batch[0, :, :, t-1] = diff[:, :, None]/255.

            true_data = seq_batch[:, :, :, K_test:, :].copy()
            pred_data = np.zeros(true_data.shape, dtype="float32")
            xt = seq_batch[:, :, :, K_test-1]
            pred_data[0] = sess.run(model.G,
                                    feed_dict={model.diff_in: diff_batch,
                                               model.xt: xt})

            if not os.path.exists(savedir):
                os.makedirs(savedir)

            cpsnr = np.zeros((K_test+T_test,))
            cssim = np.zeros((K_test+T_test,))
            pred_data = np.concatenate(
                (seq_batch[:, :, :, :K_test], pred_data), axis=3)
            true_data = np.concatenate(
                (seq_batch[:, :, :, :K_test], true_data), axis=3)
            for t in range(K_test+T_test):
                pred = (inverse_transform(
                    pred_data[0, :, :, t])*255).astype("uint8")
                target = (inverse_transform(
                    true_data[0, :, :, t])*255).astype("uint8")

                cpsnr[t] = measure.compare_psnr(pred, target)
                cssim[t] = ssim.compute_ssim(Image.fromarray(target),
                                             Image.fromarray(pred))

                pred = draw_frame(pred, t < K_test)
                target = draw_frame(target, t < K_test)

                cv2.imwrite(savedir+"/pred_"+"{0:04d}".format(t)+".png", pred)
                cv2.imwrite(savedir+"/gt_"+"{0:04d}".format(t)+".png", target)

            cmd1 = "rm "+savedir+"/pred.gif"
            cmd2 = ("ffmpeg -f image2 -framerate 3 -i "+savedir +
                    "/pred_%04d.png "+savedir+"-pred.gif")
            cmd3 = "rm "+savedir+"/pred*.png"

            system(cmd1)
            system(cmd2)
            system(cmd3)

            cmd1 = "rm "+savedir+"/gt.gif"
            cmd2 = ("ffmpeg -f image2 -framerate 3 -i "+savedir +
                    "/gt_%04d.png "+savedir+"-gt.gif")
            cmd3 = "rm "+savedir+"/gt*.png"
            system(cmd1)
            system(cmd2)
            system(cmd3)

            psnr_err = np.concatenate((psnr_err, cpsnr[None, K_test:]), axis=0)
            ssim_err = np.concatenate((ssim_err, cssim[None, K_test:]), axis=0)

            print(np.array(psnr_err).shape, np.array(
                ssim_err).shape, np.array(pred).shape)

        np.savez(save_path, psnr=psnr_err, ssim=ssim_err)
        print("Results saved to "+save_path)
    print("Done.")


In [None]:
K_test=10
T_test=6
tf.reset_default_graph()
generate_samples()