# DCGAN

Heavily based on the following tutorial:
https://github.com/awjuliani/TF-Tutorials/blob/master/DCGAN.ipynb

In [1]:
#Import the libraries we will need.
from IPython.display import YouTubeVideo
import tensorflow as tf
import numpy as np
from tensorflow.examples.tutorials.mnist import input_data
import matplotlib.pyplot as plt
import tensorflow.contrib.slim as slim
import os
import scipy.misc
import scipy

We will be using the MNIST dataset. input_data is a library that downloads the dataset and unzips it automatically.

In [2]:
mnist = input_data.read_data_sets("MNIST_data/", one_hot=False)

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


### Helper Functions

In [12]:
#This function performns a leaky relu activation, which is needed for the discriminator network.
def lrelu(x, leak=0.2, name="lrelu"):
     with tf.variable_scope(name):
         f1 = 0.5 * (1 + leak)
         f2 = 0.5 * (1 - leak)
         return f1 * x + f2 * abs(x)
    
#The below functions are taken from carpdem20's implementation https://github.com/carpedm20/DCGAN-tensorflow
#They allow for saving sample images from the generator to follow progress
def save_images(images, size, image_path):
    return imsave(inverse_transform(images), size, image_path)

def imsave(images, size, path):
    return scipy.misc.imsave(path, merge(images, size))

def inverse_transform(images):
    return (images+1.)/2.

def merge(images, size):
    h, w = images.shape[1], images.shape[2]
    img = np.zeros((h * size[0], w * size[1]))

    for idx, image in enumerate(images):
        i = idx % size[1]
        j = idx // size[1]
        img[j*h:j*h+h, i*w:i*w+w] = image

    return img

def resize_nearest_neighbor(x, scale=2):
    (_, w, h, _) = x.get_shape()
    x = tf.image.resize_nearest_neighbor(x, (scale * int(w), scale * int(h)))
    return x

## Defining the Adversarial Networks

### Generator Network

The generator takes a vector of random numbers and transforms it into a 32x32 image. Each layer in the network involves a nearest neighbor image resize operation, convolution, batch normalization, and rectified nonlinearity.

In [4]:
def generator(z, training=True, reuse=False):
    with tf.variable_scope('generator', reuse=reuse):
        # Layer 1
        g = tf.layers.dense(z, 4*4*256, kernel_initializer=initializer)
        g = tf.layers.batch_normalization(g, training=training)
        g = tf.nn.relu(g)
        g = tf.reshape(g,[-1,4,4,256])
        # Layer 2
        g = resize_nearest_neighbor(g)
        g = tf.layers.conv2d(g, 64, [5,5], padding='SAME', strides=(1, 1),\
                             name='conv1')
        g = tf.layers.batch_normalization(g, training=training)
        g = tf.nn.relu(g)
        # Layer 3
        g = resize_nearest_neighbor(g)
        g = tf.layers.conv2d(g, 32, [5,5], padding='SAME', strides=(1, 1),\
                             name='conv2')
        g = tf.layers.batch_normalization(g, training=training)
        g = tf.nn.relu(g)
        
        # Layer 4
        g = resize_nearest_neighbor(g) 
        g = tf.layers.conv2d(g, 16, [5,5], padding='SAME', strides=(1, 1),\
                             name='conv3')
        g = tf.layers.batch_normalization(g, training=training)
        g = tf.nn.relu(g)
        
        # Layer 5
        g = tf.layers.conv2d(g, 1, [32,32], padding='SAME', strides=(1, 1),\
                             name='conv4')
        g = tf.nn.tanh(g)
        return g

### Discriminator Network
The discriminator network takes as input a 32x32 image and transforms it into a single valued probability of being generated from real-world data.

In [5]:
def discriminator(x, training=True, reuse=False):
    with tf.variable_scope('discriminator', reuse=reuse):
        # Layer 1
        d = tf.layers.conv2d(x, 16, [4,4], padding='SAME', strides=(2, 2),\
                             name='conv1')
        d = tf.layers.batch_normalization(d, training=training)
        d = lrelu(d)
        # Layer 2
        d = tf.layers.conv2d(d, 32, [4,4], padding='SAME', strides=(2, 2),\
                             name='conv2')
        d = tf.layers.batch_normalization(d, training=training)
        d = lrelu(d)
        # Layer 3
        d = tf.layers.conv2d(d, 64, [4,4], padding='SAME', strides=(2, 2),\
                             name='conv3')
        d = tf.layers.batch_normalization(d, training=training)
        d = lrelu(d)
        # Reshape
        dims = d.get_shape().as_list()   
        d = tf.reshape(d, [-1, dims[1]*dims[2]*dims[3]])
        # Layer 4
        d = tf.layers.dense(d, 1, kernel_initializer=initializer)
        d = tf.nn.sigmoid(d)
        return d

## Build the graph
It's time to build the complete graph that does all the computations!

In [6]:
tf.reset_default_graph()
LEARNING_RATE_G = 1e-4
LEARNING_RATE_D = 1e-4
z_size = 100 # Size of z vector used for generator.
# This initializer is used to initialize the dense networks.
initializer = tf.truncated_normal_initializer(stddev=0.02)
# These placeholders are used as input to the generator and discriminator, respectivly
#Random vector
z_in = tf.placeholder(shape=[None,z_size],dtype=tf.float32, name='z_in') 
#Real images
real_in = tf.placeholder(shape=[None,32,32,1],dtype=tf.float32, name='real_in') 
# This boolean determines if the weights should be updated 
is_training = tf.placeholder(dtype=tf.bool, name='is_training')
#Generates images from random z vectors
Gz = generator(z_in, training=is_training) 
#Produces probabilities for real images
Dx = discriminator(real_in, training=is_training) 
#Produces probabilities for generator images
Dg = discriminator(Gz, training=is_training, reuse=True) 
#These functions together define the optimization objective of the GAN.
d_loss = -tf.reduce_mean(tf.log(Dx) + tf.log(1.-Dg)) #This optimizes the discriminator.
g_loss = -tf.reduce_mean(tf.log(Dg)) #This optimizes the generator.
# Get a list of variables for each network
d_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='discriminator')
g_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='generator')
#The below code is responsible for applying gradient descent to update the GAN.
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(update_ops):
    b = 0.5
    g_train = \
            tf.train.AdamOptimizer(learning_rate=LEARNING_RATE_G, beta1=b)\
                                    .minimize(g_loss, var_list=g_vars)
    d_train = \
            tf.train.AdamOptimizer(learning_rate=LEARNING_RATE_D, beta1=b)\
                                    .minimize(d_loss, var_list=d_vars)

## Training the network
Now that we have fully defined our network, it is time to train it!

In [9]:
batch_size = 128 #Size of image batch to apply at each iteration.
iterations = 500000 #Total number of iterations to use.
sample_directory = './figs' #Directory to save sample images from generator in.
model_directory = './models' #Directory to save trained model to.

init = tf.global_variables_initializer()
saver = tf.train.Saver()
zs2 = np.random.normal(size=[batch_size,z_size]).astype(np.float32)

with tf.Session() as sess:  
    sess.run(init)
    for i in range(iterations):
        zs = np.random.normal(size=[batch_size,z_size]).astype(np.float32)
        xs,_ = mnist.train.next_batch(batch_size) #Draw a sample batch from MNIST dataset.
        xs = (np.reshape(xs,[batch_size,28,28,1]) - 0.5) * 2.0 #Transform it to be between -1 and 1
        xs = np.lib.pad(xs, ((0,0),(2,2),(2,2),(0,0)),'constant', constant_values=(-1, -1)) #Pad the images so the are 32x32
        _,gLoss = sess.run([g_train, g_loss],feed_dict={z_in:zs, real_in:xs, is_training:True}) #Update the generator
        _,dLoss = sess.run([d_train, d_loss],feed_dict={z_in:zs, real_in:xs, is_training:True}) #Update the discriminator
        
        if i % 10 == 0:
            print("Step: " + str(i) + "  Gen Loss: " + str(gLoss) + "  Disc Loss: " + str(dLoss))
            
            newZ = sess.run(Gz,feed_dict={z_in:zs2, is_training:False}) #Use new z to get sample images from generator.
            if not os.path.exists(sample_directory):
                os.makedirs(sample_directory)
            #Save sample generator images for viewing training progress.
            save_images(np.reshape(newZ[0:36],[36,32,32]),[6,6],sample_directory+'/fig'+str(i)+'.png')

Step: 0  Gen Loss: 0.782284  Disc Loss: 1.63835
Step: 10  Gen Loss: 0.474996  Disc Loss: 1.75083


KeyboardInterrupt: 

## Using a trained network
Once we have a trained model saved, we may want to use it to generate new images, and explore the representation it has learned.

In [None]:
sample_directory = './figs' #Directory to save sample images from generator in.
model_directory = './models' #Directory to load trained model from.
batch_size_sample = 36

init = tf.global_variables_initializer()
saver = tf.train.Saver()
with tf.Session() as sess:  
    sess.run(init)
    #Reload the model.
    print 'Loading Model...'
    ckpt = tf.train.get_checkpoint_state(path)
    saver.restore(sess,ckpt.model_checkpoint_path)
    
    zs = np.random.uniform(-1.0,1.0,size=[batch_size_sample,z_size]).astype(np.float32) #Generate a random z batch
    newZ = sess.run(Gz,feed_dict={z_in:z2}) #Use new z to get sample images from generator.
    if not os.path.exists(sample_directory):
        os.makedirs(sample_directory)
    save_images(np.reshape(newZ[0:batch_size_sample],[36,32,32]),[6,6],sample_directory+'/fig'+str(i)+'.png')