# **Necessary Imports**
# Run each cell by pressing shift+enter keys

In [0]:
pip install keras_layer_normalization

In [1]:
from keras.datasets import mnist
from keras.layers import Input, Dense, Reshape, Flatten, Dropout, multiply, GaussianNoise
from keras_layer_normalization import LayerNormalization
from keras.layers import BatchNormalization, Activation, Embedding, ZeroPadding2D
from keras.layers import Concatenate
from keras.layers.advanced_activations import LeakyReLU
from keras.layers.convolutional import UpSampling2D, Conv2D, Conv2DTranspose


from keras.layers import Conv2DTranspose, ConvLSTM2D, BatchNormalization, TimeDistributed
from keras.models import Sequential, load_model


from keras.models import Sequential, Model
from keras.optimizers import Adam, RMSprop
from keras import losses
from keras.losses import binary_crossentropy
from keras.utils import to_categorical

import keras.backend as K
import scipy
import imageio
import matplotlib.pyplot as plt
import os
import glob
import numpy as np
from random import randrange

from skimage.util import random_noise
from PIL import Image

from os import listdir
from os.path import isfile, join, isdir


Using TensorFlow backend.


# **Run the cell and mount your google drive by following the generated link**

# Copy the code and paste in the section below, and then press enter.


In [0]:
from google.colab import drive
drive.mount('/gdrive')

# **The following cell will copy the training data from your google drive to colab **
# For this to work you will need to upload the training data as Training.zip under a new folder "UCSD" 

In [0]:
!mkdir Train
!unzip -q "/gdrive/My Drive/UCSD/Training.zip" -d Train
Dataset_Path ="Train/*/*"

In [0]:
Dataset_Path ="Train/*/*"

# Training Data Generation

# Get Frames as 10 frames per input

In [0]:
def get_training_data():
    
    files = sorted(glob.glob(Dataset_Path))
    
    input_shape = (256,256)

    n = 10
    data = np.zeros((int(len(files)/n), n, 256, 256, 1))
    i = 0
    idx = 0

    # Loop over all the Training Videos
    for filename in files:
        # Open image in grayscale
        im = Image.open(filename).convert('L')

        # Resize all frames to 256x256 dimensions
        im = im.resize((input_shape[0],input_shape[1]))

        # Normalize frames to range in 0 to 1. Is computionally suitable.        
        img = np.array(im, dtype=np.float32)/255.0

        # frames get appended in the form [total_frames, 10, 256, 256, 1]
        data[idx,i,:,:,:] = img.reshape(input_shape[0],input_shape[1], 1)
        i = i + 1
        if i >= n:
          idx = idx + 1
          i = 0
    return data

# The function below adds random noise to each frame of each video. This adds some flexibility in the training data.

In [0]:
def get_noisy_data(data):
    noisy = []
    sigma = 0.155

    for frame in data:
        try:
            # Add random noise to each frame. 
            noisy_frame = random_noise(frame, var=sigma ** 2)
            noisy.append(noisy_frame)
        except ValueError:  
            print(error)
            pass
    return np.array(noisy)

# *The Training class including all the network architechture *

In [0]:
class Training_Model():
    def __init__(self):

        # learning rate
        self.alpha = 0.08

        self.input_height = 256
        self.input_width = 256
        self.output_height =256 
        self.output_width = 256
        
        # 1 for grayscale
        self.channels = 1
        
        # get the training data
        self.training_data = get_training_data()
        print("Training Data", self.training_data.shape)
        
        # create the adversarial model
        self.create_networks()
    
    def create_networks(self):
        # this would be the shape of the sequence. i.e. [10, 256, 256, 1]
        frame_dims = [10, self.input_height, self.input_width, self.channels]
        
        # Selecting an optimizer which helps trainsa and learn the optimal paramaters 
        optimizer = RMSprop(lr=0.002, clipvalue=1.0, decay=1e-8)
        
        # Create the Discriminator network which takes original frames as input. This function (create_discriminator defined below) 
        # descibes the architecjture of the discriminator model
        self.discriminator = self.create_discriminator(frame_dims)


        # Discriminator model is not trained for the combined network only the generator is trained. Discriminator only differentiates between inputs like a
        # comparison function where euclidean distance of the two frames separates them.
        self.discriminator.trainable = False

        # Model trained to discriminate real images from fake. Compiled model with a binary crossentropy loss since this
        # is the output is binary (0/1). It would either be real or fake. 
        self.discriminator.compile(optimizer=optimizer, loss='binary_crossentropy')

        # Similarly creating the Generator network. Function create_generator is defined within 
        #  this class as a separate which defines the architechture of the Generator model. 
        self.generator = self.create_generator(frame_dims)
        self.generator.compile(optimizer=optimizer, loss='binary_crossentropy')
        
        
        print('\nGenerator Model')
        
        # summary describes the no. of paramaters required to train each layer in the network. 
        self.generator.summary()


        inp = Input(shape=frame_dims)

        reconstructed_frames = self.generator(inp)

        # This is arbitrary validation for the discriminator to learn about the Generator model.
        validate = self.discriminator(reconstructed_frames)
        
        # The Adversarial Model to train the Generator to minimize reconstruction loss and trick Discriminator
        # into beleiving generated images as real ones.
        self.adversarial_model = Model(inp, [reconstructed_frames, validate])
        
        
        # Setup the adversarial model here with two losses one for each network.
        self.adversarial_model.compile(loss=['binary_crossentropy', 'binary_crossentropy'],
            loss_weights=[self.alpha, 1],
            optimizer=optimizer)

        print('\nDiscriminator')
        self.discriminator.summary()

        print('\nCombined Adversarial Model')
        self.adversarial_model.summary()
    
    

    def create_discriminator(self, input_shape):
        
        # Instantiate D Network
        frames = Input(shape=input_shape)
        
        # Feature Extraction through convolution operations. 
        x = TimeDistributed(Conv2D(filters=16, kernel_size = 5, strides=2, padding='same'))(frames)
        # Normalize in the range -1 to max
        x = LeakyReLU()(x)

        x = TimeDistributed(Conv2D(filters=32, kernel_size = 5, strides=2, padding='same'))(x)
        x = LayerNormalization()(x)
        x = LeakyReLU()(x)

        x = TimeDistributed(Conv2D(filters=64, kernel_size = 5, strides=2, padding='same'))(x)
        x = LayerNormalization()(x)
        x = LeakyReLU()(x)

        x = TimeDistributed(Conv2D(filters=128, kernel_size = 5, strides=2, padding='same'))(x)
        x = LayerNormalization()(x)
        x = LeakyReLU()(x)
        
        
        # Flatten converts the 2D frame into a single dimension vector from where a final score is computed
        # using the sigmoid function which returns in the range of 0 to 1.
        x = Flatten()(x)
        x = Dense(1, activation='sigmoid')(x)

        return Model(frames, x)
    
     

    def create_generator(self, input_shape):
        # Videos are sequential hence we need a sequential model
        encoder = Sequential()
        
        
        ####################################### ENCODER PART #################################################### 
        # Encodes daata
        generator = self.add_encoder(encoder)
        
        ####################################### ENCODING DONE ###################################################
        
        ############################### TEMPORAL FEATURE EXTRACTION STARTS ######################################
        
        # Conv-lstm extract the temporal features. A conv operartion with 64 masks but no multiplication takes place.
        # Instead the model memorizes the repetetive patterns.
        generator.add(ConvLSTM2D(64, (3, 3), padding="same", return_sequences=True))
        generator.add(LayerNormalization())
        generator.add(ConvLSTM2D(32, (3, 3), padding="same", return_sequences=True))
        generator.add(LayerNormalization())
        generator.add(ConvLSTM2D(64, (3, 3), padding="same", return_sequences=True))
        generator.add(LayerNormalization())
        
        ############################### TEMPORAL FEATURE EXTRACTION ENDS ########################################
        
        
        ####################################### DECODING STARTS ###################################################
        
        # Upsample the encoded data back to original data
        decoded = self.deconv(generator)

        
        ####################################### DECODING DONE ###################################################
        
        return decoded


    def add_encoder(self, model):
        # The first layer where a convolution mask of size 11x11 spans each of the ten frames with a stride of 4. 
        # The 10 256x256 frames are reduced to 64x64. Basis expansion takes the 1 channel frame into 128 subframes.
        model.add(TimeDistributed(Conv2D(128, (11, 11), strides=4, padding="same"), batch_input_shape=(None, 10, 256, 256, 1)))
        
        # Normalize the above layer's output to feed into next layer.    
        model.add(LayerNormalization())
        
        # Similar to first layer, conv layer extracts features from frames reducing further to 32x32  
        model.add(TimeDistributed(Conv2D(64, (5, 5), strides=2, padding="same")))
        model.add(LayerNormalization())

        return model

    # Upsample the encoded data back to original data 
    def deconv(self, model):
        
        model.add(TimeDistributed(Conv2DTranspose(64, (5, 5), strides=2, padding="same")))
        model.add(LayerNormalization())
        model.add(TimeDistributed(Conv2DTranspose(128, (11, 11), strides=4, padding="same")))
        model.add(LayerNormalization())

        # 10 frames generated output generated of the same shape as the input data. 
        model.add(TimeDistributed(Conv2D(1, (11, 11), activation="sigmoid", padding="same")))
      
        return model
        

    def train(self, epochs, batch_size):

        # Record Generator network G's reconstruction training losses.
        plot_g_losses = []
        plot_g_recontruction_losses = []

        sample_files = self.training_data    
    
        noisy_data = get_noisy_data(self.training_data)
        print("noisy_data.shape",noisy_data.shape)
        
        # Adversarial ground truths
        ones = np.ones(batch_size)
        zeros = np.zeros(batch_size)

        for epoch in range(epochs):
            print('Epoch ({}/{})-------------------------------------------------'.format(epoch,epochs))
            
            # Batches help reduce memory constraint. Number of batches computed by total number of target data / batch size.
            batches = len(self.training_data) // batch_size
            
            for id in range(batches):

                # Get a batch of original and noisy framesframes .
                batch = self.training_data[id * batch_size:(id + 1) * batch_size]
                noisy_batch = noisy_data[id * batch_size:(id + 1) * batch_size]
                
                # Turn batch data to float32 type.
                frames_with_noise = np.array(noisy_batch).astype(np.float32)
                original_frames = np.array(batch).astype(np.float32)

                # generate fake frames
                batch_fake_frames = self.generator.predict(frames_with_noise)
                
                # Update Discriminator to minimize original frame inputs ->D-> ones, noisy z->G->D->zeros loss.
                discriminator_loss_original_data = self.discriminator.train_on_batch(original_frames, ones)
                discriminator_loss_fake_data = self.discriminator.train_on_batch(batch_fake_frames, zeros)

                # Update R network twice, minimize noisy z->R->D->ones and reconstruction loss.
                self.adversarial_model.train_on_batch(frames_with_noise, [original_frames, ones])
                gen_loss = self.adversarial_model.train_on_batch(frames_with_noise, [original_frames, ones])    
                
                plot_g_losses.append(epoch+id/batches)
                plot_g_recontruction_losses.append(gen_loss[1])

                msg = 'Epoch:[{0}]-[{1}/{2}] --> d_loss: {3:>0.3f}, gen_loss:{4:>0.3f}'.format(epoch, id, batches, discriminator_loss_original_data+discriminator_loss_fake_data, gen_loss[0])
                print(msg)

        # Export the Generator/R network reconstruction losses as a plot.
        plt.title('Generator network reconstruction loss')
        plt.xlabel('Epoch')
        plt.ylabel('training loss')
        plt.grid()
        plt.plot(plot_epochs,plot_g_recontruction_losses)
        self.adversarial_model.save("Model_PED2.hdf5")
        # plt.savefig('plot_g_recon_losses.png')


# Run this cell to instantiate the class.

In [0]:
model = Training_Model()

# Run this to let the training begin. It would take a lot of time to train.
# Batch size is the number of samples to be trained at once and the number of times you want the code to run as epochs.

In [0]:
model.train(epochs=200, batch_size=1)