<a href="https://colab.research.google.com/github/KuanChihLee/Generative-Adversarial-Networks/blob/master/VideoGAN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import tensorflow as tf
from keras.layers import Input, Dense, Reshape, Flatten, Dropout
from keras.layers import multiply, Embedding, concatenate, Lambda
from keras.layers import BatchNormalization
from keras.layers import Conv2D, UpSampling2D, MaxPooling2D
from keras.layers import LeakyReLU
from keras.layers import Activation
from keras.models import Sequential, Model
from keras.optimizers import Adam, SGD, RMSprop

from keras.backend import tf as ktf
from keras.utils import plot_model

import os
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
plt.style.use('seaborn-whitegrid')

Using TensorFlow backend.


In [0]:
CONV_CHANNELS_G = [[128, 256, 128, 3],
                   [128, 256, 128, 3],
                   [128, 256, 512, 256, 128, 3],
                   [128, 256, 512, 256, 128, 3]]

CONV_KERNELS_G = [[3, 3, 3, 3],
                  [5, 3, 3, 5],
                  [5, 3, 3, 3, 3, 5],
                  [7, 5, 5, 5, 5, 7]]

CONV_CHANNELS_D = [[3, 64],
                  [3, 64, 128, 128],
                  [3, 128, 256, 256],
                  [3, 128, 256, 512, 128]]

CONV_KERNELS_D = [[3],
                  [3, 3, 3],
                  [5, 5, 5],
                  [7, 7, 5, 5]]

FC_LAYERS_D = [[512, 256, 1],
              [1024, 512, 1],
              [1024, 512, 1],
              [1024, 512, 1]]

In [3]:
from google.colab import drive
drive.mount('/content/drive/')

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


In [4]:
%cd "/content/drive/My Drive/GAN"

/content/drive/My Drive/GAN


In [5]:
from glob import glob

def log10(t):
    """
    Calculates the base-10 log of each element in t.
    @param t: The tensor from which to calculate the base-10 log.
    @return: A tensor with the base-10 log of each element in t.
    """
    numerator = tf.log(t)
    denominator = tf.log(tf.constant(10, dtype=numerator.dtype))
    return numerator / denominator

def get_dir(directory):
    """
    Creates the given directory if it does not exist.
    @param directory: The path to the directory.
    @return: The path to the directory.
    """
    if not os.path.exists(directory):
        os.makedirs(directory)
    return directory
  
def get_train_batch():
    """
    Loads c.BATCH_SIZE clips from the database of preprocessed training clips.
    @return: An array of shape
            [c.BATCH_SIZE, c.TRAIN_HEIGHT, c.TRAIN_WIDTH, (3 * (c.HIST_LEN + 1))].
    """
   
    clips = np.empty([8, 32, 32, (3 * (4 + 1))],
                     dtype=np.float32)
    for i in range(8):
        path = TRAIN_DIR_CLIPS + str(np.random.choice(NUM_CLIPS)) + '.npz'
        clip = np.load(path)['arr_0']

        clips[i] = clip

    return clips
  
DATA_DIR = get_dir('./Data/')
print("Data dir: ", DATA_DIR)
TRAIN_DIR_CLIPS = get_dir(os.path.join(DATA_DIR, 'Clips/'))
print("Train Data Clip dir: ", TRAIN_DIR_CLIPS)
NUM_CLIPS = len(glob(TRAIN_DIR_CLIPS + '*'))
print("Num of clips: ", NUM_CLIPS)

Data dir:  ./Data/
Train Data Clip dir:  ./Data/Clips/
Num of clips:  100


In [0]:
class VideoGAN:
  def __init__(self):
    
    '''  Build network graph '''
    print("Generator Initialization")
    self.G = self.generator(32, 32, 3, 210, 160, 3, CONV_CHANNELS_G, CONV_KERNELS_G)
    
    print("Discriminator Initialization")
    self.D = self.discriminator(32, 32, CONV_CHANNELS_D, CONV_KERNELS_D, FC_LAYERS_D)
    
    print('Optimizer Initialization')
    optimizer = Adam(lr=0.0002, beta_1=0.5, decay=8e-8)
    
    self.D.compile(loss=self.custom_loss_functions_D(self.scale_preds_D), 
                   optimizer=optimizer, 
                   metrics=['accuracy'])
    
  def train(self, epochs):
    
    for cnt in range(epochs+1):
      
      print('Train discriminator...')
      batch = get_train_batch()
      input_frames = batch[:, :, :, :-3]
      gt_frames = batch[:, :, :, -3:]
      batch_size = np.shape(gt_frames)[0]
      
      #gen_img_frames = []
      #gts_frames = []
      #gen_output_frames = self.G.predict([input_frames, gt_frames])
      #for i in range(self.scale_nets_num_G):
      #    gen_img_frames.append(gen_output_frames[i])
      #    gts_frames.append(gen_output_frames[i+self.scale_nets_num_G])
      gen_img_frames = self.G.predict([input_frames, gt_frames])
      #print(type(self.scale_preds_G[0]))
      #print(type(self.scale_gts_G))
      #print(type(gen_img_frames))

      x_combined_batch = np.concatenate((gen_img_frames, gt_frames))
      y_combined_batch = np.concatenate((np.zeros((batch_size, 1)), np.ones((batch_size, 1))))
      d_loss = self.D.train_on_batch([x_combined_batch], [y_combined_batch]*self.scale_nets_num_D)
      #print(type(self.scale_preds_D))
      #print(type(self.scale_preds_D[0]))

      print('Training generator...')
      batch = get_train_batch()
      input_frames = batch[:, :, :, :-3]
      gt_frames = batch[:, :, :, -3:]
      batch_size = np.shape(gt_frames)[0]
      
      gen_img_frames = self.G.predict([input_frames, gt_frames])
      preds_D = self.D.predict([gen_img_frames])
      
      optimizer = Adam(lr=0.0002, beta_1=0.5, decay=8e-8)
      self.G.compile(loss=self.custom_loss_functions_G(preds_D,self.scale_preds_G,self.scale_gts_G), 
                   optimizer=optimizer, 
                   metrics=['accuracy'])
      g_loss = self.G.train_on_batch([input_frames, gt_frames], gt_frames)
      print ('epoch: %d, [Discriminator loss: %f  Total loss: %f]' % (cnt, d_loss[0], g_loss[0])) 
      
 
  def discriminator(self, height, width, 
                    conv_layers, conv_kernels, fc_layers):
    
    self.height_D = height
    self.width_D = width
   
    self.conv_layers_D = conv_layers
    self.conv_kernels_D = conv_kernels
    self.fc_layers_D = fc_layers
    self.scale_nets_num_D = len(conv_layers)
    
    self.scale_preds_D = []
    
    self.Input_frames = Input(shape=(self.height_D, self.width_D, self.conv_layers_D[0][0]))
    
    def __scaled_model_D():

      New_scale_frames_shape = (self.scale_height_D, self.scale_width_D, self.conv_layers_D[self.net_num_D][0])
      New_scale_frames = Input(shape=New_scale_frames_shape)
      
      model = Sequential()
      model.add(Conv2D(self.conv_layers_D[self.net_num_D][1], self.conv_kernels_D[self.net_num_D][0], padding='same', input_shape=New_scale_frames_shape))
      model.add(Activation('relu'))
      model.add(Dropout(0.2))
      
      for layer in range(2,len(self.conv_layers_D[self.net_num_D])):
        model.add(Conv2D(self.conv_layers_D[self.net_num_D][layer], self.conv_kernels_D[self.net_num_D][layer-1], padding='same', activation='relu'))
        model.add(MaxPooling2D(pool_size=(2, 2), strides=(1, 1)))
        model.add(Dropout(0.2))
      
      model.add(Flatten())
      for fc_layer in range(len(self.fc_layers_D[self.net_num_D])):
        if fc_layer == len(self.fc_layers_D[self.net_num_D]) - 1:
          model.add(Dense(self.fc_layers_D[self.net_num_D][fc_layer]))
          model.add(Activation('sigmoid'))
          current_scale_pred_D = model(New_scale_frames)
          
          return Model([New_scale_frames], current_scale_pred_D)
        else:
          model.add(Dense(self.fc_layers_D[self.net_num_D][fc_layer]))
          model.add(BatchNormalization(momentum=0.8))
          model.add(Activation('relu'))
          model.add(Dropout(0.2))
    
    for net_num in range(self.scale_nets_num_D):  
      
      self.net_num_D = net_num      
      scale_factor = 1. / 2 ** ((self.scale_nets_num_D - 1) - self.net_num_D)
      self.scale_height_D = int(self.height_D * scale_factor)
      self.scale_width_D = int(self.width_D * scale_factor)
         
      scale_inputs_frames = Lambda(lambda image: ktf.image.resize_images(image, (self.scale_height_D, self.scale_width_D)))(self.Input_frames)
      
      scale_model = __scaled_model_D()
      current_scale_pred = scale_model([scale_inputs_frames])
      self.scale_preds_D.append(current_scale_pred)
      
    return Model([self.Input_frames], self.scale_preds_D)
  
  
  def generator(self, height_train, width_train, channel_train, 
                      height_ground, width_ground, channel_ground, 
                      conv_layers, conv_kernels):

    self.height_G = height_train
    self.width_G = width_train
    self.channel_G = channel_train
    self.conv_layers_G = conv_layers
    self.conv_kernels_G = conv_kernels
    self.scale_nets_num_G = len(conv_layers)
    
    self.scale_preds_G = []  
    self.scale_gts_G = []  
    self.__scale_preds_G = []  

    Input_frames = Input(shape=(self.height_G, self.width_G, self.channel_G * 4))
    Input_gt_frames = Input(shape=(self.height_G, self.width_G, self.channel_G))
     
    def __scaled_model_G(model, combined_frames, input_frames, last_scale_frames):
      
      for layer in range(1,len(self.conv_layers_G[self.net_num])):
        if layer == len(self.conv_layers_G[self.net_num]) - 1:
          if self.net_num != self.scale_nets_num_G - 1:
            model.add(UpSampling2D())
          model.add(Conv2D(self.conv_layers_G[self.net_num][layer], self.conv_kernels_G[self.net_num][layer], padding='same'))
          model.add(Activation('tanh'))
          current_scale_pred_train = model(combined_frames)
          #print("Input Shape: ", input_frames.shape)
          #print("Output Shape: ", current_scale_pred_train.shape)
          
          return Model([input_frames, last_scale_frames], current_scale_pred_train)
        else:
          model.add(Conv2D(self.conv_layers_G[self.net_num][layer], self.conv_kernels_G[self.net_num][layer], padding='same'))
          model.add(BatchNormalization(momentum=0.8))
          model.add(Activation('relu'))
          model.add(Dropout(0.2))
    
    def __scaled_model_G_1():

      New_scale_frames = Input(shape=(self.scale_height_G, self.scale_width_G, self.channel_G * 4))
      Last_scale_frames = Input(shape=(self.scale_height_G, self.scale_width_G, self.channel_G))
      New_scale_frames_shape = (self.scale_height_G, self.scale_width_G, self.channel_G * 4)
      
      model = Sequential()
      model.add(Conv2D(self.conv_layers_G[self.net_num][0], self.conv_kernels_G[self.net_num][0], padding='same', input_shape=New_scale_frames_shape))
      model.add(BatchNormalization(momentum=0.8))
      model.add(Activation('relu'))
      model.add(Dropout(0.2))
      
      return __scaled_model_G(model, New_scale_frames, New_scale_frames, Last_scale_frames)
    
    def __scaled_model_G_hiddens():

      New_scale_frames = Input(shape=(self.scale_height_G, self.scale_width_G, self.channel_G * 4))
      Last_scale_frames = Input(shape=(self.scale_height_G, self.scale_width_G, self.channel_G))
      New_combined_frames = concatenate([New_scale_frames, Last_scale_frames], axis=-1)
      
      New_combined_frames_shape = (self.scale_height_G, self.scale_width_G, self.channel_G * 5)
      
      model = Sequential()
      model.add(Conv2D(self.conv_layers_G[self.net_num][0], self.conv_kernels_G[self.net_num][0], padding='same', input_shape=New_combined_frames_shape))
      model.add(BatchNormalization(momentum=0.8))
      model.add(Activation('relu'))
      model.add(Dropout(0.2))
      
      return __scaled_model_G(model, New_combined_frames, New_scale_frames, Last_scale_frames)
   
    def __resize_img(image):
      resized = ktf.image.resize_images(image, (self.scale_height_G, self.scale_width_G))
      return resized
  
    self.net_num = 0
    scale_factor = 1. / 2 ** ((self.scale_nets_num_G - 1) - self.net_num)
    self.scale_height_G = int(self.height_G * scale_factor)
    self.scale_width_G = int(self.width_G * scale_factor)

    scale_inputs_frames = Lambda(__resize_img, input_shape=(self.height_G, self.width_G, self.channel_G * 4), 
                                 output_shape=(self.scale_height_G, self.scale_width_G, self.channel_G * 4))(Input_frames)
    scale_gts_frames = Lambda(__resize_img, input_shape=(self.height_G, self.width_G, self.channel_G), 
                              output_shape=(self.scale_height_G, self.scale_width_G, self.channel_G))(Input_gt_frames)

    hidden_model = __scaled_model_G_1()
    self.__scale_preds_G.append(hidden_model([scale_inputs_frames,scale_gts_frames]))

    self.scale_gts_G.append(scale_gts_frames)
    self.scale_preds_G.append(Lambda(__resize_img, output_shape=(self.scale_height_G, self.scale_width_G, self.channel_G))(self.__scale_preds_G[self.net_num]))

    for net_num in range(1,self.scale_nets_num_G):  
      
      self.net_num = net_num  
      scale_factor = 1. / 2 ** ((self.scale_nets_num_G - 1) - self.net_num)
      self.scale_height_G = int(self.height_G * scale_factor)
      self.scale_width_G = int(self.width_G * scale_factor)

      scale_inputs_frames = Lambda(__resize_img, input_shape=(self.height_G, self.width_G, self.channel_G * 4), 
                                 output_shape=(self.scale_height_G, self.scale_width_G, self.channel_G * 4))(Input_frames)
      scale_gts_frames = Lambda(__resize_img, input_shape=(self.height_G, self.width_G, self.channel_G), 
                              output_shape=(self.scale_height_G, self.scale_width_G, self.channel_G))(Input_gt_frames)
            
      last_scale_pred = self.__scale_preds_G[self.net_num - 1]

      hidden_model = __scaled_model_G_hiddens()
      self.__scale_preds_G.append(hidden_model([scale_inputs_frames,last_scale_pred]))
      
      self.scale_gts_G.append(scale_gts_frames)
      self.scale_preds_G.append(Lambda(__resize_img, output_shape=(self.scale_height_G, self.scale_width_G, self.channel_G))(self.__scale_preds_G[self.net_num]))
    
    #for gts in self.scale_gts_G:
    #  self.scale_preds_G.append(gts)
      
    return Model([Input_frames, Input_gt_frames], self.scale_preds_G[-1])
    
  
  def custom_loss_functions_D(self,preds):
    
    def adv_loss(y_true,_):
      scale_losses = []
      for i in range(len(preds)):
          loss = bce_loss(preds[i], y_true)
          scale_losses.append(loss)
      return tf.reduce_mean(tf.stack(scale_losses))
    def bce_loss(preds, targets):
      return tf.squeeze(-1 * (tf.matmul(targets, log10(preds), transpose_a=True) +
                              tf.matmul(1 - targets, log10(1 - preds), transpose_a=True)))
    return adv_loss 
  
  
  def custom_loss_functions_G(self,preds_D,gen_img_G,gts_G,
                              lam_adv=1,lam_lp=1,lam_gdl=1,l_num=2,alpha=2):
    def combined_loss(y_true,_):
      batch_size = tf.shape(gen_img_G[0])[0]
      loss = lam_lp * lp_loss(gen_img_G, gts_G, l_num)
      loss += lam_gdl * gdl_loss(gen_img_G, gts_G, alpha)  
      #loss += lam_adv * adv_loss(preds_D, tf.ones([batch_size, 1]))
      return loss   
              
    def adv_loss(preds, labels):
      scale_losses = []
      for i in range(len(preds)):
          loss = bce_loss(preds[i], labels)
          scale_losses.append(loss)
      return tf.reduce_mean(tf.stack(scale_losses))
    
    def bce_loss(preds, targets):
      return tf.squeeze(-1 * (tf.matmul(targets, log10(preds), transpose_a=True) +
                              tf.matmul(1 - targets, log10(1 - preds), transpose_a=True)))
    
    def lp_loss(gen_frames, gt_frames, l_num):
      scale_losses = []
      for i in range(len(gen_frames)):
        scale_losses.append(tf.reduce_sum(tf.abs(gen_frames[i] - gt_frames[i])**l_num))
      return tf.reduce_mean(tf.stack(scale_losses))
    
    def gdl_loss(gen_frames, gt_frames, alpha):
      scale_losses = []
      for i in range(len(gen_frames)):
        # create filters [-1, 1] and [[1],[-1]] for diffing to the left and down respectively.
        pos = tf.constant(np.identity(3), dtype=tf.float32)
        neg = -1 * pos
        filter_x = tf.expand_dims(tf.stack([neg, pos]), 0)  # [-1, 1]
        filter_y = tf.stack([tf.expand_dims(pos, 0), tf.expand_dims(neg, 0)])  # [[1],[-1]]
        strides = [1, 1, 1, 1]  # stride of (1, 1)
        padding = 'SAME'

        gen_dx = tf.abs(tf.nn.conv2d(gen_frames[i], filter_x, strides, padding=padding))
        gen_dy = tf.abs(tf.nn.conv2d(gen_frames[i], filter_y, strides, padding=padding))
        gt_dx = tf.abs(tf.nn.conv2d(gt_frames[i], filter_x, strides, padding=padding))
        gt_dy = tf.abs(tf.nn.conv2d(gt_frames[i], filter_y, strides, padding=padding))

        grad_diff_x = tf.abs(gt_dx - gen_dx)
        grad_diff_y = tf.abs(gt_dy - gen_dy)

        scale_losses.append(tf.reduce_sum((grad_diff_x ** alpha + grad_diff_y ** alpha)))
      return tf.reduce_mean(tf.stack(scale_losses))
    
    return combined_loss

In [7]:
model = VideoGAN()
model.train(24)

Generator Initialization
Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Discriminator Initialization
Optimizer Initialization
Train discriminator...
Instructions for updating:
Use tf.cast instead.
Training generator...
epoch: 0, [Discriminator loss: 20.228703  Total loss: 17211.798828]
Train discriminator...
Training generator...
epoch: 1, [Discriminator loss: 42.028423  Total loss: 11223.508789]
Train discriminator...
Training generator...
epoch: 2, [Discriminator loss: 35.112808  Total loss: 8658.461914]
Train discriminator...
Training generator...
epoch: 3, [Discriminator loss: 11.935239  Total loss: 7845.647949]
Train discriminator...
Training generator...
epoch: 4, [Discriminator loss: 10.460975  Total loss: 6838.465820]
Train discriminator...
Training generator...
epoch: 5, [Discriminator loss: 7.138430  Total loss: 7449.572754]
Train discrim

KeyboardInterrupt: ignored