In [1]:
#@title Imports
%tensorflow_version 1.x 
import numpy as np
import scipy.misc
import imageio
import argparse
from PIL import Image 
import math
import tensorflow as tf
from keras.datasets import mnist
from tensorflow.contrib.layers import conv2d, conv2d_transpose, layer_norm, fully_connected
import random
import os
import time

Using TensorFlow backend.


In [0]:
#@title Parameters
BATCH_SIZE = 128
IMG_DIM    = (28, 28, 1)
Z_DIM      = 80
ZC_DIM     = 10
ZU_DIM     = 2

OUTPUT_DIM = int(np.prod(IMG_DIM))
LAMBDA     = 10
ITERS      = 1  # 20001
CRITIC_ITER= 5

leakyrelu_alpha    = 0.1

In [0]:
#@title Network functions
def save_images(X, save_path):
    # [-1, 1] -> [0,255]
    X = (np.abs(127.5 * X +  127.4999)).astype('uint8')
    
    n_samples = X.shape[0]
    rows = int(np.sqrt(n_samples))
    while n_samples % rows != 0:
        rows -= 1

    nh, nw = rows, n_samples//rows
    
    if X.ndim == 2:
        X = np.reshape(X, (X.shape[0], int(np.sqrt(X.shape[1])), int(np.sqrt(X.shape[1]))))

    if X.ndim == 4:
        h, w = X[0].shape[:2]
        img = np.zeros((h*nh, w*nw, 3))
        
    elif X.ndim == 3:
        h, w = X[0].shape[:2]
        img = np.zeros((h*nh, w*nw))

    for n, x in enumerate(X):
        j = n//nw
        i = n%nw
        img[j*h:j*h+h, i*w:i*w+w] = x

    imageio.imwrite(save_path, img.astype('uint8'))


def lrelu(x):
    return tf.nn.relu(x) - leakyrelu_alpha * tf.nn.relu(-x)
""" Model Definitions """


def generator_tf(x, reuse = True):
    with tf.variable_scope("Generator", reuse = reuse):
        x = tf.identity(x, name="input")
        x = tf.layers.dense(x, 1024, activation=tf.nn.relu)
        x = tf.layers.dense(x, 7 * 7 * 128 , activation=tf.nn.relu)
        x = tf.reshape(x, [-1, 7, 7, 128])
        x = tf.layers.conv2d_transpose(x, 64, 4, 2, padding='same', activation=tf.nn.relu)
        x = tf.layers.conv2d_transpose(x, 1, 4, 2,  padding='same', activation=tf.nn.tanh)
        x = tf.identity(x, name="output")
                
        return x


def d_tf(x, reuse = True):
    with tf.variable_scope("Discriminator", reuse = reuse):
        x = tf.identity(x, name="input")
        x = tf.layers.conv2d(x,     64,  4, 2, padding='same', activation=lrelu)
        x = tf.layers.conv2d(x,     128, 4, 2, padding='same', activation=lrelu)
        x = tf.contrib.layers.flatten(x)
        x = tf.layers.dense(x, 1024, activation=lrelu)
        x = tf.layers.dense(x, 1)
        x = tf.identity(x, name="output")
        return x


def q_tf(x, reuse = True):
    with tf.variable_scope("Q", reuse = reuse):
        x = tf.identity(x, name="input")
        x = tf.layers.conv2d(x,     64,  4, 2, padding='same', activation=lrelu)
        x = tf.layers.conv2d(x,     128, 4, 2, padding='same', activation=lrelu)
        x = tf.contrib.layers.flatten(x)
        x = tf.layers.dense(x, 1024, activation=lrelu)        
        x = fully_connected(x, ZC_DIM + ZU_DIM , activation_fn=None)
        x = tf.identity(x, name="output")
        return x    


def q_cost_tf(z, q):
    # categorical part
    z_cat = z[:, : ZC_DIM]
    q_cat = q[:, : ZC_DIM]
    lcat = tf.nn.softmax_cross_entropy_with_logits(labels=z_cat, logits=q_cat)
    
    # uniform part
    z_uni = z[:, ZC_DIM: ZC_DIM + ZU_DIM]
    q_uni = q[:, ZC_DIM: ZC_DIM + ZU_DIM]
    
    luni  = 0.5 * tf.square(z_uni - q_uni);
    
    return tf.reduce_mean(lcat) + tf.reduce_mean(luni);
    

def prepare_mnist_list(X):
    X = (X.astype(np.float32) - 127.5)/127.5
    X = X[:, :, :, None]
    return list(X)

In [0]:
#@title Samplers
def random_uc():
    idxs = np.random.randint(ZC_DIM, size=BATCH_SIZE)
    onehot = np.zeros((BATCH_SIZE, ZC_DIM))
    onehot[np.arange(BATCH_SIZE), idxs] = 1
    return onehot      


def random_z():
    rez = np.zeros([BATCH_SIZE, Z_DIM])
    rez[:,        : ZC_DIM] = random_uc()
    rez[:, ZC_DIM : ]       = np.random.uniform(-1, 1, size=(BATCH_SIZE, Z_DIM - ZC_DIM))        
    return rez;


def static_uc(n_row):
    idxs = np.array(list(range(ZC_DIM)))
    onehot = np.zeros((n_row, ZC_DIM))
    onehot[np.arange(n_row), idxs] = 1
    return onehot 


def static_z(c_idx, n_row): # c_idx = 1 -> c1, 1 to n.
  onehot = static_uc(n_row)
  cts = np.linspace(-1, 1, n_row)
  rez = []

  for cls in onehot:
    for val in cts:
      current = np.zeros(Z_DIM)
      current[ : ZC_DIM] = cls
      current[ZC_DIM + (c_idx - 1) : ZC_DIM + c_idx] = val
      rez.append(current)

  rez = np.array(rez).reshape((n_row * ZC_DIM, Z_DIM))
  return rez



In [0]:
#@title Training loop
def train():
    # Prepare Training Data
    (X_train, _), (X_test, _) = mnist.load_data()
    X_train_list = prepare_mnist_list(X_train)
    X_test_list = prepare_mnist_list(X_test)

    # Initialize Models
    real_data = tf.placeholder(tf.float32, (None, *IMG_DIM))
    z_ph = tf.placeholder(tf.float32, (None, Z_DIM))
    fake_data = generator_tf(z_ph, reuse=False)
    d_on_real_data = d_tf(real_data, reuse=False)
    d_on_fake_data = d_tf(fake_data)
    q_on_fake_data = q_tf(fake_data, reuse=False)

    alpha = tf.random_uniform(shape=[tf.shape(fake_data)[0], 1, 1, 1], minval=0., maxval=1.)
    interpolates = real_data + alpha * (fake_data - real_data)

    gradients = tf.gradients(d_tf(interpolates), [interpolates])[0]
    slopes = tf.sqrt(tf.reduce_sum(tf.square(gradients), reduction_indices=[1, 2, 3]))
    gradient_penalty = tf.reduce_mean((slopes - 1) ** 2)

    q_cost = q_cost_tf(z_ph, q_on_fake_data)
    g_cost = -tf.reduce_mean(d_on_fake_data)
    d_cost = tf.reduce_mean(d_on_fake_data) - tf.reduce_mean(d_on_real_data) + LAMBDA * gradient_penalty

    g_param = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='Generator')
    d_param = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='Discriminator')
    q_param = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='Q')

    g_train_op = tf.train.RMSPropOptimizer(learning_rate=1e-3).minimize(g_cost, var_list=g_param)
    d_train_op = tf.train.RMSPropOptimizer(learning_rate=1e-3).minimize(d_cost, var_list=d_param)
    q_train_op = tf.train.RMSPropOptimizer(learning_rate=1e-3).minimize(q_cost, var_list=q_param + g_param)

    saver = tf.train.Saver(max_to_keep=5)

    sess = tf.Session()
    sess.run(tf.global_variables_initializer())

    fix_z_c1 = static_z(1, 10)
    fix_z_c2 = static_z(2, 10)

    f_train_stat = open("train_log.txt", "w", buffering=1)
    f_test_stat = open("test_log.txt", "w", buffering=1)
    os.system("mkdir -p figs_c1")
    os.system("mkdir -p figs_c2")

    for it in range(ITERS):
        start_time = time.time()

        for i in range(CRITIC_ITER):
            data = np.array(random.sample(X_train_list, BATCH_SIZE))
            d_cost_rez, _ = sess.run([d_cost, d_train_op], feed_dict={real_data: data, z_ph: random_z()})

        g_cost_rez, q_cost_rez, _, _ = sess.run([g_cost, q_cost, g_train_op, q_train_op], feed_dict={z_ph: random_z()})
        f_train_stat.write("%i %g %g %g\n" % (it, g_cost_rez, d_cost_rez, q_cost_rez))

        if it % 100 == 0:
            # sample and save
            print("Training it: %i, time/it: %g, d_cost: %g" % (it, (time.time() - start_time), d_cost_rez))
            samples = sess.run([fake_data], feed_dict={z_ph: fix_z_c1})
            save_images(np.squeeze(samples), 'figs_c1/samples_%.6i.png' % (it))
            samples = sess.run([fake_data], feed_dict={z_ph: fix_z_c2})
            save_images(np.squeeze(samples), 'figs_c2/samples_%.6i.png' % (it))
            # test
            data = np.array(random.sample(X_test_list, BATCH_SIZE))
            g_cost_rez, d_cost_rez, q_cost_rez = sess.run([g_cost, d_cost, q_cost],
                                                          feed_dict={real_data: data, z_ph: random_z()})
            f_test_stat.write("%i %g %g %g\n" % (it, g_cost_rez, d_cost_rez, q_cost_rez))
        if (it + 1) % 2000 == 0:
            saver.save(sess, 'save/model', global_step=it)

    saver.save(sess, 'save/final-model')

In [0]:
train()

In [5]:
(X_train, _), (X_test, _) = mnist.load_data()
X_train_list = prepare_mnist_list(X_train)
X_test_list = prepare_mnist_list(X_test)

# Initialize Models
real_data = tf.placeholder(tf.float32, (None, *IMG_DIM))
z_ph = tf.placeholder(tf.float32, (None, Z_DIM))
fake_data = generator_tf(z_ph, reuse=False)
d_on_real_data = d_tf(real_data, reuse=False)
d_on_fake_data = d_tf(fake_data)
q_on_fake_data = q_tf(fake_data, reuse=False)

alpha = tf.random_uniform(shape=[tf.shape(fake_data)[0], 1, 1, 1], minval=0., maxval=1.)
interpolates = real_data + alpha * (fake_data - real_data)

gradients = tf.gradients(d_tf(interpolates), [interpolates])[0]
slopes = tf.sqrt(tf.reduce_sum(tf.square(gradients), reduction_indices=[1, 2, 3]))
gradient_penalty = tf.reduce_mean((slopes - 1) ** 2)

q_cost = q_cost_tf(z_ph, q_on_fake_data)
g_cost = -tf.reduce_mean(d_on_fake_data)
d_cost = tf.reduce_mean(d_on_fake_data) - tf.reduce_mean(d_on_real_data) + LAMBDA * gradient_penalty

g_param = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='Generator')
d_param = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='Discriminator')
q_param = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='Q')

g_train_op = tf.train.RMSPropOptimizer(learning_rate=1e-3).minimize(g_cost, var_list=g_param)
d_train_op = tf.train.RMSPropOptimizer(learning_rate=1e-3).minimize(d_cost, var_list=d_param)
q_train_op = tf.train.RMSPropOptimizer(learning_rate=1e-3).minimize(q_cost, var_list=q_param + g_param)

saver = tf.train.Saver(max_to_keep=5)

sess = tf.Session()
sess.run(tf.global_variables_initializer())

fix_z_c1 = static_z(1, 10)
fix_z_c2 = static_z(2, 10)

f_train_stat = open("train_log.txt", "w", buffering=1)
f_test_stat = open("test_log.txt", "w", buffering=1)
os.system("mkdir -p figs_c1")
os.system("mkdir -p figs_c2")

for it in range(ITERS):
    start_time = time.time()

    for i in range(CRITIC_ITER):
        data = np.array(random.sample(X_train_list, BATCH_SIZE))
        d_cost_rez, _ = sess.run([d_cost, d_train_op], feed_dict={real_data: data, z_ph: random_z()})

    g_cost_rez, q_cost_rez, _, _ = sess.run([g_cost, q_cost, g_train_op, q_train_op], feed_dict={z_ph: random_z()})
    f_train_stat.write("%i %g %g %g\n" % (it, g_cost_rez, d_cost_rez, q_cost_rez))

    if it % 100 == 0:
        # sample and save
        print("Training it: %i, time/it: %g, d_cost: %g" % (it, (time.time() - start_time), d_cost_rez))
        samples = sess.run([fake_data], feed_dict={z_ph: fix_z_c1})
        save_images(np.squeeze(samples), 'figs_c1/samples_%.6i.png' % (it))
        samples = sess.run([fake_data], feed_dict={z_ph: fix_z_c2})
        save_images(np.squeeze(samples), 'figs_c2/samples_%.6i.png' % (it))
        # test
        data = np.array(random.sample(X_test_list, BATCH_SIZE))
        g_cost_rez, d_cost_rez, q_cost_rez = sess.run([g_cost, d_cost, q_cost],
                                                      feed_dict={real_data: data, z_ph: random_z()})
        f_test_stat.write("%i %g %g %g\n" % (it, g_cost_rez, d_cost_rez, q_cost_rez))
    if (it + 1) % 2000 == 0:
        saver.save(sess, 'save/model', global_step=it)

saver.save(sess, 'save/final-model')

Downloading data from https://s3.amazonaws.com/img-datasets/mnist.npz
Instructions for updating:
Use keras.layers.Dense instead.
Instructions for updating:
Please use `layer.__call__` method instead.
Instructions for updating:
Use `tf.keras.layers.Conv2DTranspose` instead.
Instructions for updating:
Use `tf.keras.layers.Conv2D` instead.
The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.

Instructions for updating:
Use keras.layers.flatten instead.
Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See `tf.nn.softmax_cross_entropy_with_logits_v2`.

Instructions for updating:
Call initializer i

'save/final-model'

In [6]:
q_tf

<function __main__.q_tf>