In [None]:
%%capture
pip install --upgrade tensorflow==2.11.0

In [None]:
%%capture
pip install skillsnetwork

In [None]:
import warnings
warnings.simplefilter('ignore')

import numpy as np
import tensorflow as tf
import keras
print("tensorflow version ",tf.__version__)
from tensorflow.keras import layers
from tensorflow.keras.layers import Input,Conv2DTranspose,BatchNormalization,ReLU,Conv2D,LeakyReLU
from tensorflow.keras.models import Sequential

from IPython import display
import skillsnetwork
print(skillsnetwork.__version__)

import matplotlib.pyplot as plt
%matplotlib inline

import os
from os import listdir
from pathlib import Path

import imghdr
import time
from tqdm.auto import tqdm


# This function will plot 5 images from an array

In [None]:
def plot_array(X,title=""):
    plt.rcParams['figure.figsize'] = (20,20)
    
    for i,x in enumerate(X[0:5]):
        x = x.numpy()
        max_ = x.max()
        min_ = x.min()
        xnew = np.uint(255*(x-min_)/(max_-min_))
        plt.subplot(1,5,i+1)
        plt.imshow(xnew)
        plt.axis('off')
    plt.show()

Applying Convolutional Neural Networks to GANS has led to improved results. They are called Deep Convolutional Generative Adversarial Networks (DCGANs). In this lab, we will build and train DCGANs using several approaches introduced in the original <a href="https://arxiv.org/pdf/1511.06434.pdf?utm_medium=Exinfluencer&utm_source=Exinfluencer&utm_content=000026UJ&utm_term=10006555&utm_id=NA-SkillsNetwork-Channel-SkillsNetworkCoursesIBMDeveloperSkillsNetworkML311Coursera747-2022-01-01">DCGANs paper</a>. 


The proposed approaches are summarized here:


- Replace any pooling layers with **strided convolutions (discriminator)** and **fractional-strided
convolutions (generator)**.
- Use **batchnorm** in both the generator and the discriminator.
- **Remove fully connected hidden layers** for deeper architectures.
- Use **ReLU** activation in generator for all layers except for the output, which uses **Tanh**.
- Use **LeakyReLU** activation in the discriminator for all layers except for the output, which uses **Sigmoid**.
- Use **Adam optimizer**.  


In [None]:
dataset_url="https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-ML311-Coursera/labs/Module5/L2/cartoon_data.tgz"
await skillsnetwork.prepare(dataset_url, overwrite=True)

The Anime Face or the Cartoon images are stored in the `cartoon_2000` folder in your current working directory. As a preprocessing step, we have removed any files that are not proper image formats (based on the file extensions) and any duplicate images.


In [None]:
img_height, img_width, batch_size=64,64,128

Next, we create a Keras <code>image_dataset_from_directory</code> object with a specified image directory and the parameters are defined as above.

In [None]:
train_ds = tf.keras.utils.image_dataset_from_directory(directory="/kaggle/working/cartoon_data",
                                                      image_size=(img_height,img_width),
                                                      batch_size=batch_size,
                                                      label_mode=None)

**(OPTIONAL)** If you are running this notebook locally and you have multiple cores, then we can use the runtime to tune the value dynamically at runtime as follows:


In [None]:
AUTOTUNE = tf.data.experimental.AUTOTUNE
train_ds = train_ds.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)

We apply the Lambda function on `train_ds` to normalize the pixel values of all the input images from $[0, 255]$ to $[-1, 1]$:


In [None]:
normalization_layer = layers.experimental.preprocessing.Rescaling(scale=1./127.5,offset=-1)
normalized_ds = train_ds.map(lambda x: normalization_layer(x))

Let's take one batch of images for displaying:


In [None]:
images = train_ds.take(1)

Convert the batch dimension to the indexes in a list:


In [None]:
X = [x for x in images]

In [None]:
plot_array(X[0])

The Generator is comprised of several layers of transposed convolution, the opposite of convolution operations.

- Each Conv2DTranspose layer (except the final layer) is followed by a Batch Normalization layer and a **Relu activation**; for more implementation details, check out <a href="https://arxiv.org/pdf/1511.06434.pdf?utm_medium=Exinfluencer&utm_source=Exinfluencer&utm_content=000026UJ&utm_term=10006555&utm_id=NA-SkillsNetwork-Channel-SkillsNetworkCoursesIBMDeveloperSkillsNetworkML311Coursera747-2022-01-01">[1]</a>. 
- The final transpose convolution layer has three output channels since the output needs to be a color image. We use the **Tanh activation** in the final layer. 

See the illustration of the architecture from <a href="https://arxiv.org/pdf/1511.06434.pdf?utm_medium=Exinfluencer&utm_source=Exinfluencer&utm_content=000026UJ&utm_term=10006555&utm_id=NA-SkillsNetwork-Channel-SkillsNetworkCoursesIBMDeveloperSkillsNetworkML311Coursera747-2022-01-01">[1]</a> below.

<center><img src="https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-ML311-Coursera/labs/Module6/generator.png" alt="generator image" width="80%"></center>

We build the Generator network by using the parameter values from <a href="https://learnopencv.com/deep-convolutional-gan-in-pytorch-and-tensorflow/?utm_medium=Exinfluencer&utm_source=Exinfluencer&utm_content=000026UJ&utm_term=10006555&utm_id=NA-SkillsNetwork-Channel-SkillsNetworkCoursesIBMDeveloperSkillsNetworkML311Coursera747-2022-01-01" >[2]<a>.


In [None]:
def make_generator():
    model = Sequential()
    #input is a latent vector of 100 dimensions
    model.add(Input(shape=(1,1,100),name='input_layer'))
    
    #block 1 dimensionality of the output space 64*8
    model.add(Conv2DTranspose(64*8,kernel_size=4,strides=4,padding='same',kernel_initializer=tf.keras.initializers.RandomNormal(mean=0.0,stddev=0.02),use_bias=False,name='conv_transpose_1'))
    model.add(BatchNormalization(momentum=0.1,epsilon=0.8,center=1,scale=0.02,name='bn_1'))
    model.add(ReLU(name='relu_1'))
    
    #block 2 input is 4*4*(64*8)
    model.add(Conv2DTranspose(64*4,kernel_size=4,strides=2,padding='same',kernel_initializer=tf.keras.initializers.RandomNormal(mean=0.0,stddev=0.02),use_bias=False,name='conv_transpose_2'))
    model.add(BatchNormalization(momentum=0.1,epsilon=0.8,center=1,scale=0.02,name='bn_2'))
    model.add(ReLU(name='relu_2'))
    
    #block 3 input is 8*8(64*4)
    model.add(Conv2DTranspose(64*2,kernel_size=4,strides=2,padding='same',kernel_initializer=tf.keras.initializers.RandomNormal(mean=0.0,stddev=0.02),use_bias=False,name='conv_transpose_3'))
    model.add(BatchNormalization(momentum=0.1,epsilon=0.8,center=1,scale=0.02,name='bn_3'))
    model.add(ReLU(name='relu_3'))
    
    #block 4 input is 16*16(64*2)
    model.add(Conv2DTranspose(64*1,kernel_size=4,strides=2,padding='same',kernel_initializer=tf.keras.initializers.RandomNormal(mean=0.0,stddev=0.02),use_bias=False,name='conv_transpose_4'))
    model.add(BatchNormalization(momentum=0.1,epsilon=0.8,center=1,scale=0.02,name='bn_4'))
    model.add(ReLU(name='relu_4'))
    
    model.add(Conv2DTranspose(3,4,2,padding='same',kernel_initializer=tf.keras.initializers.RandomNormal(mean=0.0,stddev=0.02),use_bias=False,
                             activation='tanh',name='conv_transpose_5'))
    return model

In [None]:
gen = make_generator()
gen.summary()

The Discriminator has five convolution layers. 

- All but the first and final Conv2D layers have Batch Normalization, since directly applying batchnorm to all layers could result in sample oscillation and model instability; 
- The first four Conv2D layers use the **Leaky-Relu activation** with a slope of 0.2. 
- Lastly, instead of a fully connected layer, the  output layer has a convolution layer with a **Sigmoid activation** function.


In [None]:
def make_discriminator():
    model = Sequential()
    #block 1 input is 64*64*3
    model.add(Input(shape=(64,64,3),name='input_layer'))
    model.add(Conv2D(64,kernel_size=4,strides=2,padding='same',kernel_initializer=tf.keras.initializers.RandomNormal(mean=0,stddev=0.02),use_bias=False,name='conv_1'))
    model.add(LeakyReLU(0.2,name='leaky_1'))
    #block 2 input 32*32*64
    model.add(Conv2D(64*2,kernel_size=4,strides=2,padding='same',kernel_initializer=tf.keras.initializers.RandomNormal(mean=0,stddev=0.02),use_bias=False,name='conv_2'))
    model.add(BatchNormalization(momentum=0.1,epsilon=0.8,center=1.0,scale=0.02,name='bn_1'))
    model.add(LeakyReLU(0.2,name='leaky_2'))
    #block 3 input 16*16*128
    model.add(Conv2D(64*4,4,2,padding='same',kernel_initializer=tf.keras.initializers.RandomNormal(mean=0,stddev=0.02),use_bias=False,name='conv_3'))
    model.add(BatchNormalization(momentum=0.1,epsilon=0.8,center=1,scale=0.02,name='bn_2'))
    model.add(LeakyReLU(0.2,name='leaky_3'))
    #block 4 input 8*8*256
    model.add(Conv2D(64*8,4,2,padding='same',kernel_initializer=tf.keras.initializers.RandomNormal(mean=0,stddev=0.02),use_bias=False,name='conv_4'))
    model.add(BatchNormalization(momentum=0.1,epsilon=0.8,center=1,scale=0.02,name='bn_3'))
    model.add(LeakyReLU(0.2,name='leaky_4'))
    #block 5
    model.add(Conv2D(1,4,2,padding='same',kernel_initializer=tf.keras.initializers.RandomNormal(mean=0,stddev=0.02),use_bias=False,
                    activation='sigmoid',name='conv_5'))
    
    return model

In [None]:
disc = make_discriminator()
disc.summary()

## Defining Loss Functions

In [None]:
cross_entropy = tf.keras.losses.BinaryCrossentropy(from_logits=True)

In [None]:
def generator_loss(xhat):
    return cross_entropy(tf.ones_like(xhat),xhat)

In [None]:
def discriminator_loss(X,xhat):
    real_loss = cross_entropy(tf.ones_like(X),X)
    fake_loss = cross_entropy(tf.zeros_like(xhat),xhat)
    total_loss = 0.5*(real_loss + fake_loss)
    return total_loss

## Defining Optimizers 
 
We create two Adam optimizers for the discriminator and the generator, respectively. We pass the following arguments to the optimizers:

- learning rate of 0.0002.
- beta coefficients $\beta_1 = 0.5$ and $\beta_2 = 0.999$, which are responsible for computing the running averages of the gradients during backpropagation.


In [None]:
generator_optimizer = tf.keras.optimizers.Adam(learning_rate=0.0002,beta_1=0.5,beta_2=0.999)
discriminator_optimizer = tf.keras.optimizers.Adam(learning_rate=0.0002,beta_1=0.5,beta_2=0.999)

## Create Train Step Function

As this lab is more computationally intensive than the last lab, we convert the training step into a function and then use the  @tf.function decorator, which allows the function to be "compiled" into a **callable TensorFlow graph**. This will speed up the training; for more information, read <a href="https://www.tensorflow.org/guide/function?utm_medium=Exinfluencer&utm_source=Exinfluencer&utm_content=000026UJ&utm_term=10006555&utm_id=NA-SkillsNetwork-Channel-SkillsNetworkCoursesIBMDeveloperSkillsNetworkML311Coursera747-2022-01-01">here </a> 


In [None]:
@tf.function

def train_step(X):
    #random samples it was found if you increase the  stander deviation, you get better results 
    z = tf.random.normal([BATCH_SIZE,1,1,latent_dim])
    # needed to compute the gradients for a list of variables.
    with tf.GradientTape() as gen_tape,tf.GradientTape() as disc_tape:
        #generate sample
        xhat = generator(z,training=True)
        #the output of the discriminator for real data
        real_output=discriminator(X,training=True)
        #the output of the discriminator for fake data
        fake_output=discriminator(xhat,training=True)
        
        #loss for each
        gen_loss = generator_loss(fake_output)
        disc_loss = discriminator_loss(real_output,fake_output)
        
        # Compute the gradients for gen_loss and generator
        gradients_of_generator = gen_tape.gradient(gen_loss,generator.trainable_variables)
        gradients_of_discriminator = disc_tape.gradient(disc_loss,discriminator.trainable_variables)
        
        # Ask the optimizer to apply the processed gradients
        generator_optimizer.apply_gradients(zip(gradients_of_generator,generator.trainable_variables))
        discriminator_optimizer.apply_gradients(zip(gradients_of_discriminator,discriminator.trainable_variables))
        


Don't be intimidated by the code above, here is a summary of what a train step accomplishes:

- First, we sample `z`, a batch of noise vectors from a normal distribution ($\mu = 1, \sigma = 1$) and feed it to the Generator.
- The Generator produces generated or "fake" images `xhat`.
- We feed real images `X` and fake images `xhat` to the Discriminator and obtain `real_output` and `fake_output` respectively as the scores.
- We calculate Generator loss `gen_loss` using the `fake_output` from Discriminator since we want the fake images to fool the Discriminator as much as possible.
- We calculate Discriminator loss `disc_loss` using both the `real_output` and `fake_output` since we want the Discriminator to distinguish the two as much as possible.
- We calculate `gradients_of_generator` and  `gradients_of_discriminator` based on the losses obtained.
- Finally, we update the Generator and Discriminator by letting their respective optimizers apply the processed gradients on the trainable model parameters.


In [None]:
generator = make_generator()
BATCH_SIZE=128
latent_dim=100

noise = tf.random.normal([BATCH_SIZE,1,1,latent_dim])
xhat = generator(noise,training=False)
plot_array(xhat)

In [None]:
epochs=6

discriminator=make_discriminator()

generator= make_generator()


for epoch in range(epochs):
    
    #data for the true distribution of your real data samples training ste
    start = time.time()
    i=0
    for X in tqdm(normalized_ds, desc=f"epoch {epoch+1}", total=len(normalized_ds)):
        
        i+=1
        if i%1000:
            print("epoch {}, iteration {}".format(epoch+1, i))
            
        train_step(X)
    

    noise = tf.random.normal([BATCH_SIZE, 1, 1, latent_dim])
    Xhat=generator(noise,training=False)
    X=[x for x in normalized_ds]
    print("orignal images")
    plot_array(X[0])
    print("generated images")
    plot_array(Xhat)
    print ('Time for epoch {} is {} sec'.format(epoch + 1, time.time()-start))

### Loading Pre-trained model (150 epochs)
As you saw, training a GAN with only one epoch takes quite a long time. If we want to evaluate the performance of a fully trained and optimized GAN, we would need to increase the number of epochs. Thus, to help you avoid extremely long training time in this lab, we will just download the pre-trained Generator network parameters and then use Kera `load_model` function to obtain a pre-trained Generator, which we will use to generate images directly.


In [None]:
generator_url="https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-ML311-Coursera/labs/Module6/generator.tar.gz"
await skillsnetwork.prepare(generator_url, overwrite=True)

In [None]:
from tensorflow.keras.models import load_model

full_generator = load_model('/kaggle/working/generator')

In [None]:
latent_dim=100

noise = tf.random.normal([200,1,1,latent_dim])
xhat = full_generator(noise,training=False)
plot_array(xhat)

## Explore Latent Variables 
Values of $\mathbf{z}$ that are relatively close together will produce similar images. For example, we can assigns elements of $\mathbf{z}$ close values such as $[1,0.8,..,0.4]$. 


In [None]:
for c in [1,0.8,0.6,0.5]:
    xhat = full_generator(c*tf.ones([1,1,1,latent_dim]),training=False)
    plot_array(xhat)