<a href="https://colab.research.google.com/github/KuanChihLee/Generative-Adversarial-Networks/blob/master/VideoGAN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import tensorflow as tf
from keras.layers import Input, Dense, Reshape, Flatten, Dropout
from keras.layers import multiply, Embedding, concatenate
from keras.layers import BatchNormalization
from keras.layers import Conv2D, UpSampling2D, MaxPooling2D
from keras.layers import LeakyReLU
from keras.layers import Activation
from keras.models import Sequential, Model
from keras.optimizers import Adam, SGD, RMSprop

import os
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
plt.style.use('seaborn-whitegrid')

Using TensorFlow backend.


In [0]:
CONV_CHANNELS_G = [[128, 256, 128, 3],
                   [128, 256, 128, 3],
                   [128, 256, 512, 256, 128, 3],
                   [128, 256, 512, 256, 128, 3]]

CONV_KERNELS_G = [[3, 3, 3, 3],
                  [5, 3, 3, 5],
                  [5, 3, 3, 3, 3, 5],
                  [7, 5, 5, 5, 5, 7]]

CONV_CHANNELS_D = [[3, 64],
                  [3, 64, 128, 128],
                  [3, 128, 256, 256],
                  [3, 128, 256, 512, 128]]

CONV_KERNELS_D = [[3],
                  [3, 3, 3],
                  [5, 5, 5],
                  [7, 7, 5, 5]]

FC_LAYERS_D = [[512, 256, 1],
              [1024, 512, 1],
              [1024, 512, 1],
              [1024, 512, 1]]

In [0]:
class loss_function():
  def adv_loss(preds, labels):
    """
    Calculates the sum of BCE losses between the predicted classifications and true labels.
    @param preds: The predicted classifications at each scale.
    @param labels: The true labels. (Same for every scale).
    @return: The adversarial loss.
    """
    # calculate the loss for each scale
    scale_losses = []
    for i in xrange(len(preds)):
        loss = bce_loss(preds[i], labels)
        scale_losses.append(loss)

    # condense into one tensor and avg
    return tf.reduce_mean(tf.stack(scale_losses))
  
  def bce_loss(preds, targets):
    """
    Calculates the sum of binary cross-entropy losses between predictions and ground truths.
    @param preds: A 1xN tensor. The predicted classifications of each frame.
    @param targets: A 1xN tensor The target labels for each frame. (Either 1 or -1). Not "truths"
                    because the generator passes in lies to determine how well it confuses the
                    discriminator.
    @return: The sum of binary cross-entropy losses.
    """
    return tf.squeeze(-1 * (tf.matmul(targets, np.log10(preds), transpose_a=True) + 
                            tf.matmul(1 - targets, log10(1 - preds), transpose_a=True))) 

In [0]:
class VideoGAN:
  def __init__(self):
    
    '''  Build network graph '''

    print("Generator")
    self.G = self.generator(32, 32, 3, 210, 160, 3, CONV_CHANNELS_G, CONV_KERNELS_G)
    print("Discriminator")
    self.D = self.discriminator(32, 32, CONV_CHANNELS_D, CONV_KERNELS_D, FC_LAYERS_D)
    
    optimizer = Adam(lr=0.0002, beta_1=0.5, decay=8e-8)
    
    labels = Input(shape=(1,))
    #self.global_loss = loss_function.adv_loss(self.D.scale_preds_D, labels)
    
    
    
    
    
  def discriminator(self, height, width, 
                    conv_layers, conv_kernels, fc_layers):
    
    self.height_D = height
    self.width_D = width
   
    self.conv_layers_D = conv_layers
    self.conv_kernels_D = conv_kernels
    self.fc_layers_D = fc_layers
    
    self.scale_nets_num_D = len(conv_layers)
    
    self.scale_preds_D = []
    
    Input_img = Input(shape=(self.height_D, self.width_D, self.conv_layers_D[0][0]))
    
    def __scaled_model_D():

      scale_img = Input(shape=self.scale_shape)
      
      model = Sequential()    
      # conv
      model.add(Conv2D(self.conv_layers_D[self.net_num_D][1], self.conv_kernels_D[self.net_num_D][0], input_shape=self.scale_shape, padding='same'))
      model.add(Activation('relu'))
      model.add(Dropout(0.2))
      for layer in range(2,len(self.conv_layers_D[self.net_num_D])):
        model.add(Conv2D(self.conv_layers_D[self.net_num_D][layer], self.conv_kernels_D[self.net_num_D][layer-1], padding='same', activation='relu'))
        model.add(MaxPooling2D(pool_size=(2, 2), strides=(1, 1)))
        model.add(Dropout(0.2))
        
      # FC
      model.add(Flatten())
      for fc_layer in range(len(self.fc_layers_D[self.net_num_D])):
        if fc_layer == len(self.fc_layers_D[self.net_num_D]) - 1:
          model.add(Dense(self.fc_layers_D[self.net_num_D][fc_layer]))
          model.add(Activation('sigmoid'))
          current_scale_pred_D = model(scale_img)
          model.summary()
          
          return Model([scale_img], current_scale_pred_D)
        else:
          model.add(Dense(self.fc_layers_D[self.net_num_D][fc_layer]))
          model.add(BatchNormalization(momentum=0.8))
          model.add(Activation('relu'))
          model.add(Dropout(0.2))
    
    for net_num in range(self.scale_nets_num_D):  
      
      self.net_num_D = net_num      
      scale_factor = 1. / 2 ** ((self.scale_nets_num_D - 1) - self.net_num_D)
      self.scale_height_D = int(self.height_D * scale_factor)
      self.scale_width_D = int(self.width_D * scale_factor)
         
      inputs = tf.image.resize_images(Input_img, [self.scale_height_D, self.scale_width_D])
      self.scale_shape = (self.scale_height_D, self.scale_width_D, self.conv_layers_D[self.net_num_D][0])
      
      scale_model = __scaled_model_D()
      current_scale_pred_D = scale_model([inputs])
      self.scale_preds_D.append(current_scale_pred_D)
      
    #print(self.scale_preds_D)
    return Model([Input_img])
   
    
  def generator(self, height_train, width_train, channel_train, 
                      height_ground, width_ground, channel_ground, 
                      conv_layers, conv_kernels):

    self.train_H_G = height_train
    self.train_W_G = width_train
    self.train_C_G = channel_train

    self.test_H_G = height_ground
    self.test_W_G = width_ground
    self.test_C_G = channel_ground

    self.conv_layers_G = conv_layers
    self.conv_kernels_G = conv_kernels

    self.scale_nets_num = len(conv_layers)
    
    self.scale_preds_train = []  # the generated images at each scale
    #self.scale_gts_train = []  # the ground truth images at each scale

    self.scale_preds_test = []  # the generated images at each scale
    #self.scale_gts_test = [] # the ground truth images at each scale
    
    self.d_scale_preds = []  # the predictions from the discriminator model

    def __scaled_model_G():

      scale_img_shape = (self.scale_height, self.scale_width, self.train_C_G * 4)
      scale_gts_shape = (self.scale_height, self.scale_width, self.train_C_G)
      
      scale_img = Input(shape=scale_img_shape)
      scale_gts = Input(shape=scale_gts_shape)
      
      if self.net_num > 0:
        scale_pred_shape = (self.scale_height, self.scale_width, self.train_C_G)
        scale_pred = Input(shape=scale_pred_shape)
        scale_conb_img = concatenate([scale_img, scale_pred], axis=-1)
        scale_img_shape = (self.scale_height, self.scale_width, self.train_C_G * 5)
      else:
        scale_conb_img = scale_img
        scale_img_shape = scale_img_shape
        scale_pred = Input(shape=(0,0,0,0))
      
      model = Sequential()
      
      model.add(Conv2D(self.conv_layers_G[self.net_num][0], self.conv_kernels_G[self.net_num][0], padding='same', input_shape=scale_img_shape))
      model.add(BatchNormalization(momentum=0.8))
      model.add(Activation('relu'))
      model.add(Dropout(0.2))

      for layer in range(1,len(self.conv_layers_G[self.net_num])):
        
        if layer == len(self.conv_layers_G[self.net_num]) - 1:
          model.add(Conv2D(self.conv_layers_G[self.net_num][layer], self.conv_kernels_G[self.net_num][layer], padding='same'))
          model.add(Activation('tanh'))
          current_scale_pred_train = model(scale_conb_img)
          model.summary()
          return Model([scale_img, scale_pred], current_scale_pred_train)
        else:
          model.add(Conv2D(self.conv_layers_G[self.net_num][layer], self.conv_kernels_G[self.net_num][layer], padding='same'))
          model.add(BatchNormalization(momentum=0.8))
          model.add(Activation('relu'))
          model.add(Dropout(0.2))

    '''  Build Generator '''
    for net_num in range(self.scale_nets_num):  
      
      self.net_num = net_num      
      scale_factor = 1. / 2 ** ((self.scale_nets_num - 1) - self.net_num)
      self.scale_height = int(self.train_H_G * scale_factor)
      self.scale_width = int(self.train_W_G * scale_factor)
      self.scale_height_test = int(self.test_H_G * scale_factor)
      self.scale_width_test = int(self.test_W_G * scale_factor)
   
      inputs = Input(shape=(self.scale_height, self.scale_width, self.train_C_G * 4))
      inputs_test = Input(shape=(self.scale_height_test, self.scale_width_test, self.test_C_G * 4))
      
      if self.net_num > 0:
        last_scale_pred_train = Input(shape=(self.scale_height, self.scale_width, self.train_C_G))
        last_scale_pred_test = Input(shape=(self.scale_height_test, self.scale_width_test, self.test_C_G))
      else:
        last_scale_pred_train = Input(shape=(0,0,0,0))
        last_scale_pred_test = Input(shape=(0,0,0,0))

      scale_model = __scaled_model_G()
      current_scale_pred_train = scale_model([inputs, last_scale_pred_train])
      self.scale_preds_train.append(current_scale_pred_train)
      #print(current_scale_pred_train)
      
      current_scale_pred_test = scale_model([inputs_test, last_scale_pred_test])
      self.scale_preds_test.append(current_scale_pred_test)
      #print(current_scale_pred_test)

In [5]:
model = VideoGAN()

Generator
Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 4, 4, 128)         13952     
_________________________________________________________________
batch_normalization_1 (Batch (None, 4, 4, 128)         512       
_________________________________________________________________
activation_1 (Activation)    (None, 4, 4, 128)         0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 4, 4, 128)         0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 4, 4, 256)         295168    
_________________________________________________________________
batch_norma