# Generative Adversarial Networks


We'll start with a very simplest setting that is still interesting.

In [None]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
%pylab inline

To demonstrate how GANs work, we'll use the MNIST dataset. Our generator will learn how to generate small pictures of handwritten digits, and our discriminator will try to distinguish between real images and fakes.

In [None]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

We have two parameters that directly impact training time and model quality: `n_train_steps` and `batch_size`. 

Reasonable numbers for `batch_size` include small powers of 2; I like 256.

In [None]:
n_train_steps = 30000
batch_size = 32

Our generator will have a very simple structure. The input will be a 100-dimensional noise vector $z$, and we'll use just two layers. As an equation, the network looks like:

$G = \tanh((\mbox{relu}(z * W_1 + b_1)) * W_2 + b_2)$

In [None]:
z = tf.placeholder(tf.float32, shape=(None, 100))
g_w1 = tf.get_variable("g_w1", [100,512], initializer=tf.contrib.layers.xavier_initializer())
g_b1 = tf.get_variable("g_b1", [512], initializer=tf.constant_initializer(0.0))
g_w2 = tf.get_variable("g_w2", [512,784], initializer=tf.contrib.layers.xavier_initializer())
g_b2 = tf.get_variable("g_b2", [784], initializer=tf.constant_initializer(0.0))

g_params = [g_w1, g_b1, g_w2, g_b2]

def generator(z):
    g_y1 = tf.nn.relu(tf.matmul(z, g_w1) + g_b1)
    g = tf.matmul(g_y1, g_w2) + g_b2
    G = tf.nn.tanh(g)
    return G

In [None]:
x = tf.placeholder(tf.float32, shape=(None, 784))
d_w1 = tf.get_variable("d_w1", [784,256], initializer=tf.contrib.layers.xavier_initializer_conv2d())
d_b1 = tf.get_variable("d_b1", [256], initializer=tf.constant_initializer(0.0))
d_w2 = tf.get_variable("d_w2", [256,1], initializer=tf.contrib.layers.xavier_initializer_conv2d())
d_b2 = tf.get_variable("d_b2", [1], initializer=tf.constant_initializer(0.0))

d_params = [d_w1, d_b1, d_w2, d_b2]

def discriminator(x):
    d_y1 = tf.nn.relu(tf.matmul(x, d_w1) + d_b1)
    d_logit = tf.matmul(d_y1, d_w2) + d_b2
    d_prob = tf.nn.sigmoid(d_logit)
    return d_prob, d_logit


In [None]:
G = generator(z)
D_real, D_logit_real = discriminator(x)
D_fake, D_logit_fake = discriminator(G)

In [None]:
obj_d = -tf.reduce_mean(tf.log(D_real) + tf.log(1-D_fake))
obj_g = -tf.reduce_mean(tf.log(D_fake))

In [None]:
opt_d = tf.train.AdamOptimizer(1e-5).minimize(obj_d, var_list=d_params)
opt_g = tf.train.AdamOptimizer(1e-5).minimize(obj_g, var_list=g_params)

In [None]:
def noise_prior(batch_size, dim):
    return np.random.normal(0.0, 1.0, size=(batch_size, dim))

In [None]:
sess=tf.InteractiveSession()
tf.initialize_all_variables().run()

In [None]:
k_d = 1
k_g = 1
histd, histg= np.zeros(n_train_steps), np.zeros(n_train_steps)
for i in range(n_train_steps):
    x_data, t_data = mnist.train.next_batch(batch_size)
    x_mean = np.mean(x_data, axis=0)
    x_std = np.std(x_data)
    x_data = (x_data - x_mean) / x_std
    noise = noise_prior(batch_size, 100)
    for j in range(k_d):
        histd[i], _ = sess.run([obj_d, opt_d], {x : x_data, z : noise})
    for j in range(k_g):
        histg[i], _ = sess.run([obj_g, opt_g], {z : noise})
        
    if i % (n_train_steps//10) == 0:
        print i, histd[i], histg[i]


In [None]:
plt.plot(range(n_train_steps), histd, label='obj_d')
plt.plot(range(n_train_steps), histg, label='obj_g')
plt.legend()

In [None]:
test_batch, _ = mnist.train.next_batch(10 * batch_size)
test_mean = np.mean(test_batch, axis=0)
test_std = np.std(test_batch)

out_im = (sess.run(G, {z : noise_prior(1, 100)}) * test_std + test_mean )
out_im.shape = (28,28)
imshow(out_im)

In [None]:
out_im = sess.run(G, {z : noise_prior(1, 100)})
out_im = test_std * out_im + test_mean
out_im.shape = (28,28)
imshow(out_im, cmap="gray")