# This tutorial is running on Geforce GTX 1080Ti 12GB
Generator learn how to draw after about 50 epochs, so be patient

## Load Basic Library

In [None]:
import math
import numpy as np
import tensorflow as tf

## Set Environment and Parameters

In [None]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0' # use first GPU

seed = 2 # random seed
model_dir = "model_InfoGAN/" # folder for saving model and log
BATCH_SIZE = 1024 # number of images in one batch
CATEGORICAL_LATENT_SIZE = 10 # size of categorical latent code
CONTINUOUS_LATENT_SIZE = 2 # size of continuous latent code
NOISE_DIM = 62 # noise dimension for generator
EPOCHS = 300
SAVE_SUMMARY_STEPS = 100 # save summary to tensorboard - one step means one batch
NUM_GPUS = 1 # number of GPU

## Load Data
Use MNIST dataset

In [None]:
(image, label),(_, _)  = tf.keras.datasets.mnist.load_data()
image = np.expand_dims(image, axis=-1)
label = np.expand_dims(label, axis=-1)
print("Image shape:", image.shape)
print("Label shape:", label.shape)

## Feed Training Data to Model

In [None]:
def train_input_fn(images, labels):
    
    def make_generator(images, labels):

        def _generator():
            for image, label in zip(images, labels):
                yield image, label

        return _generator
    
    # Normalize the values of the image from [0, 255] to [-1.0, 1.0]
    def _preprocessing(image, label):
        image = image * (2.0 / 255.0) - 1.0
        return image, label
    
    dataset = tf.data.Dataset.from_generator(make_generator(images, labels), (tf.float32, tf.float32))
    dataset = dataset.shuffle(buffer_size=10000)
    dataset = dataset.repeat(EPOCHS)
    dataset = dataset.map(_preprocessing)
    dataset = dataset.batch(BATCH_SIZE, drop_remainder=True)
    dataset = dataset.prefetch(None)
    
    iterator = dataset.make_one_shot_iterator()
    
    images, labels = iterator.get_next()
    images = tf.reshape(images, [-1, 28, 28, 1])
    print("output image:", images.shape)
    
    tf.summary.image("images", images)
    
    # create categorical, continious and random noises
    c_cat = tf.one_hot(np.random.randint(0, CATEGORICAL_LATENT_SIZE, size=[BATCH_SIZE]), depth=CATEGORICAL_LATENT_SIZE, dtype=tf.float32)
    c_cont = tf.random_normal(shape=(BATCH_SIZE, CONTINUOUS_LATENT_SIZE))
    random_noises = tf.random_normal(shape=(BATCH_SIZE, NOISE_DIM))
    
    features = {"images": images,
                "c_cat": c_cat,
                "c_cont": c_cont,
                "random_noises": random_noises}
    
    return features, labels

## Feed Random Noise Data to Model for Eval and Prediction

In [None]:
def random_noise_input_fn():
    
    np.random.seed(seed)
    
    c_cat = tf.one_hot([3 for i in range(10)], depth=10, dtype=tf.float32)
    c_cont = tf.constant([[i, 0.0] for i in np.linspace(-2, 2, num=10, dtype=np.float32)])
    
    dataset = tf.data.Dataset.from_tensors(tf.constant(np.random.randn(10, NOISE_DIM), dtype=tf.float32))
    
    iterator = dataset.make_one_shot_iterator()
    
    noises = iterator.get_next()
    
    return {"random_noises": noises, "c_cat": c_cat, "c_cont": c_cont}, None

## Model Structure
**Remember: Do batch normalization in training mode, but not in evaluation and prediction mode**  
This model Structure based on DCGAN

In [None]:
def generator(c_cat, c_cont, noise, training):
    
    with tf.variable_scope("generator", reuse=tf.AUTO_REUSE):
        vectors = tf.concat([c_cat, c_cont, noise], axis=-1)
        with tf.variable_scope("block1"):
            vectors = tf.layers.dense(vectors, units=1024, name="dense")
            vectors = tf.layers.batch_normalization(vectors, training=training, name="batch_normalization")
            vectors = tf.nn.relu(vectors, name="relu")
        with tf.variable_scope("block2"):
            vectors = tf.layers.dense(vectors, units=7*7*128, name="dense")
            vectors = tf.layers.batch_normalization(vectors, training=training, name="batch_normalization")
            vectors = tf.nn.relu(vectors, name="relu")
        images = tf.reshape(vectors, [-1, 7, 7, 128])
        with tf.variable_scope("block3"):
            images = tf.layers.conv2d_transpose(images, filters=64, kernel_size=(4, 4), strides=(2, 2), padding="same", name="deconv")
            images = tf.layers.batch_normalization(images, training=training, name="batch_normalization")
            images = tf.nn.relu(images, name="relu")
        with tf.variable_scope("output"):
            images = tf.layers.conv2d_transpose(images, filters=1, kernel_size=(4, 4), strides=(2, 2), padding="same", name="deconv")
            images = tf.nn.tanh(images, name="tanh")
            
        return images
    
def discriminator(images, c_cat, c_cont, training):
    
    with tf.variable_scope("discriminator", reuse=tf.AUTO_REUSE):
        with tf.variable_scope("block1"):
            images = tf.layers.conv2d(images, filters=64, kernel_size=(4, 4), strides=(2, 2), padding="same", name="conv")
            images = tf.layers.batch_normalization(images, training=training, name="batch_normalization")
            images = tf.nn.leaky_relu(images, alpha=0.2, name="leaky_relu")
        with tf.variable_scope("block2"):
            images = tf.layers.conv2d(images, filters=128, kernel_size=(4, 4), strides=(2, 2), padding="same", name="conv")
            images = tf.layers.batch_normalization(images, training=training, name="batch_normalization")
            images = tf.nn.leaky_relu(images, alpha=0.2, name="leaky_relu")
        vectors = tf.layers.flatten(images, name="flatten")
        with tf.variable_scope("block3"):
            vectors = tf.layers.dense(vectors, units=1024, name="dense")
            vectors = tf.layers.batch_normalization(vectors, training=training, name="batch_normalization")
            vectors = tf.nn.leaky_relu(vectors, alpha=0.2, name="leaky_relu")
        with tf.variable_scope("output"):
            values = tf.layers.dense(vectors, units=1, name="dense")
        with tf.variable_scope("latent_code"):
            with tf.variable_scope("block1"):
                    code = tf.layers.dense(vectors, units=128, name="dense")
                    code = tf.layers.batch_normalization(code, training=training, name="batch_normalization")
                    code = tf.nn.leaky_relu(code, alpha=0.2, name="leaky_relu")
            with tf.variable_scope("categorical"):
                cat = tf.layers.dense(code, units=CATEGORICAL_LATENT_SIZE, name="dense")
            with tf.variable_scope("continuous"):
                with tf.variable_scope("mean"):
                    mean = tf.layers.dense(code, units=CONTINUOUS_LATENT_SIZE, name="dense")
                with tf.variable_scope("log_std"):
                    log_std = tf.layers.dense(code, units=CONTINUOUS_LATENT_SIZE, name="dense")
    return values, cat, mean, log_std

## Model Function for tf.Estimator
To save model for tensorflow serving, set **`export_outputs`** parameter in prediction mode   
Mutual Information Implementation: https://github.com/tdeboissiere/DeepLearningImplementations/issues/47  
**Note:**    
**1. Add mutual information to generator and discriminator**  
**2. Careful choosing learning rate**   

In [None]:
def model_fn(features, labels, mode):
    
    if mode == tf.estimator.ModeKeys.TRAIN:
        training = True
    else:
        training = False
    
    fake_images = generator(features["c_cat"], features["c_cont"], features["random_noises"], training)
    
    # Prediction mode for tensorflow serving
    if mode == tf.estimator.ModeKeys.PREDICT:
        predictions = {
            "images": fake_images
        }
        return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions, export_outputs={ 
            'output': tf.estimator.export.PredictOutput(predictions)})
    
    fake_values, cat, mean, log_std = discriminator(fake_images, features["c_cat"], features["c_cont"], training)
    
    # calculate cross entropy loss   
    discriminator_fake_loss = tf.losses.sigmoid_cross_entropy(multi_class_labels=tf.zeros_like(fake_values), logits=fake_values, scope="discriminator_fake_loss")
    generator_loss = tf.losses.sigmoid_cross_entropy(multi_class_labels=tf.ones_like(fake_values), logits=fake_values, scope="generator_loss")
    
    if mode == tf.estimator.ModeKeys.TRAIN:
        
        real_images = features["images"]
        real_values, _, _, _ = discriminator(real_images, features["c_cat"], features["c_cont"], training)
        
        discriminator_real_loss = tf.losses.sigmoid_cross_entropy(multi_class_labels=tf.ones_like(real_values), logits=real_values, scope="discriminator_real_loss")
        discriminator_loss = tf.add(discriminator_real_loss, discriminator_fake_loss, name="discriminator_loss")
        
        # mutual information
        cat_loss = tf.losses.softmax_cross_entropy(onehot_labels=features["c_cat"], logits=cat, scope="categorical_loss")
        tf.summary.scalar("categorical_loss", cat_loss)
        cont_loss = tf.reduce_mean(tf.reduce_sum(log_std + 0.5 * tf.square((features["c_cont"] - mean) / (tf.exp(log_std) + 1e-7)), axis=1), name="continuous_loss")
        tf.summary.scalar("continuous_loss", cont_loss)
        mutual_information_loss = tf.add(cat_loss, cont_loss, name="mutual_information_loss")
        tf.summary.scalar("mutual_information_loss", mutual_information_loss)
        
        discriminator_loss += mutual_information_loss
        generator_loss += mutual_information_loss
        tf.summary.scalar("discriminator_loss", discriminator_loss)
        
        # optimizer
        lr = tf.train.exponential_decay(0.0002, tf.train.get_global_step(), 500, 0.95)
        tf.summary.scalar("learning_rate", lr)
        generator_optimizer = tf.train.AdamOptimizer(learning_rate=0.001, beta1=0.5)
        discriminator_optimizer = tf.train.AdamOptimizer(learning_rate=0.0002, beta1=0.5)
        
        # for batch normalization, tell tensorflow update batch normalization mean and variance
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            # training variable
            var_list=tf.trainable_variables()
            generator_var = [var for var in var_list if "generator" in var.name]
            discriminator_var = [var for var in var_list if "discriminator" in var.name]
            
            # training operation
            discriminator_train_op = discriminator_optimizer.minimize(discriminator_loss, var_list=discriminator_var)
            generator_train_op = generator_optimizer.minimize(generator_loss, var_list=generator_var)
        
        step = tf.assign_add(tf.train.get_or_create_global_step(), 1)
        train_op = tf.group([discriminator_train_op, generator_train_op, step])
        
        # monitor trianing information
        logging_hook = tf.train.LoggingTensorHook({"generator_loss": generator_loss,
                                                   "discriminator_loss": discriminator_loss,
                                                   "mutual_information_loss": mutual_information_loss}, 
                                                  every_n_iter=SAVE_SUMMARY_STEPS)
        
        return tf.estimator.EstimatorSpec(mode, loss=generator_loss, train_op=train_op, training_hooks=[logging_hook])
    
    if mode == tf.estimator.ModeKeys.EVAL:
        tf.summary.image("fake_images", fake_images, max_outputs=10)
        eval_summary_hook = tf.train.SummarySaverHook(
                                save_steps=1,
                                output_dir=model_dir + "/eval_summary",
                                summary_op=tf.summary.merge_all())
        metrics = {"discriminator_fake_loss" : tf.metrics.mean(discriminator_fake_loss)}
        return tf.estimator.EstimatorSpec(mode, loss=generator_loss, eval_metric_ops=metrics, evaluation_hooks=[eval_summary_hook])

## For Multiple GPU (Parallel Computing)
Testing, not stable version  
Evaluation is not yet distributed

In [None]:
def get_distribution_strategy(num_gpus, all_reduce_alg=None):
    """Return a DistributionStrategy for running the model.
    Args:
    num_gpus: Number of GPUs to run this model.
    all_reduce_alg: Specify which algorithm to use when performing all-reduce.
      See tf.contrib.distribute.AllReduceCrossTowerOps for available algorithms.
      If None, DistributionStrategy will choose based on device topology.
    Returns:
    tf.contrib.distribute.DistibutionStrategy object.
    """
    if num_gpus == 0:
        return tf.contrib.distribute.OneDeviceStrategy("device:CPU:0")
    elif num_gpus == 1:
        return tf.contrib.distribute.OneDeviceStrategy("device:GPU:0")
    else:
        if all_reduce_alg:
            return tf.contrib.distribute.MirroredStrategy(
                num_gpus=num_gpus,
                cross_tower_ops=tf.contrib.distribute.AllReduceCrossTowerOps(
                    all_reduce_alg, num_packs=num_gpus))
        else:
            return tf.contrib.distribute.MirroredStrategy(num_gpus=num_gpus)

## Set Runing Config

In [None]:
session_config = tf.ConfigProto()
session_config.gpu_options.per_process_gpu_memory_fraction = 0.8
session_config.gpu_options.allow_growth = True
config = tf.estimator.RunConfig(model_dir=model_dir, 
                                tf_random_seed=seed, 
                                save_summary_steps=SAVE_SUMMARY_STEPS, 
                                save_checkpoints_steps=SAVE_SUMMARY_STEPS, 
                                session_config=session_config,
                                keep_checkpoint_max=5, 
                                log_step_count_steps=SAVE_SUMMARY_STEPS, )
#                                train_distribute=get_distribution_strategy(NUM_GPUS)) #for mutiple GPUs
clf = tf.estimator.Estimator(model_fn=model_fn, model_dir=model_dir, config=config)

## Train and Evaluate

In [None]:
train_spec = tf.estimator.TrainSpec(lambda:train_input_fn(image, label), max_steps=18000)
eval_spec = tf.estimator.EvalSpec(lambda:random_noise_input_fn(), throttle_secs=1)
tf.estimator.train_and_evaluate(clf, train_spec, eval_spec)

## Save model for Tensorflow Serving
Can not save model after **predict**, because `Graph` is finalized and cannot be modified  
You can assign which model to be saved by `checkpoint_path` parameter

In [None]:
# input
c_cat = tf.placeholder(tf.float32, shape=[None, CATEGORICAL_LATENT_SIZE], name="c_cat")
c_cont = tf.placeholder(tf.float32, shape=[None, CONTINUOUS_LATENT_SIZE], name="c_cont")
vector = tf.placeholder(tf.float32, shape=[None, NOISE_DIM], name='random_noises')
# input receiver
input_fn = tf.estimator.export.build_raw_serving_input_receiver_fn({
    'c_cat': c_cat,
    'c_cont': c_cont,
    'random_noises': vector
})

clf.export_savedmodel("saved_model/", input_fn, checkpoint_path="model_InfoGAN/model.ckpt-17579")

## Load model and predict
Estimator predict method return **generator** type, so if you want to get all predictions please use for loop  
```python
for result in results:
    print(result)
```

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
results = clf.predict(lambda: random_noise_input_fn(), checkpoint_path="model_InfoGAN/model.ckpt-17579")
for result in results:
    plt.imshow(result["images"][:,:,0])
    break

## Load model from `Estimator.export_savedmodel`
Reference: https://qiita.com/parkkiung123/items/13adb482860f356f97f3  

In [None]:
import tensorflow as tf
import numpy as np

export_dir = 'saved_model/1538095337'

with tf.Session(graph=tf.Graph()) as sess:
    # saved_model load
    tf.saved_model.loader.load(sess, [tf.saved_model.tag_constants.SERVING], export_dir)
    # print all tensor name
#     for op in sess.graph.get_operations():
#         print(op.values())
    # input
    i = sess.graph.get_tensor_by_name("random_noises:0")
    cat = sess.graph.get_tensor_by_name("c_cat:0")
    cont = sess.graph.get_tensor_by_name("c_cont:0")
    # output
    r = sess.graph.get_tensor_by_name("generator/output/tanh:0")
    image = sess.run(r, feed_dict={i:np.random.randn(1, NOISE_DIM), cat:[[0, 1, 0, 0, 0, 0, 0, 0, 0, 0]], cont:[[0, 0]]})
    print(image.shape)
    plt.imshow(image[0][:,:,0])