In [0]:
%tensorflow_version 1.x
!pip install comet_ml
!pip install imageio
from comet_ml import Experiment
import imageio
import numpy as np
import tensorflow as tf
from keras.datasets import mnist
from tensorflow.contrib.layers import conv2d, conv2d_transpose, layer_norm, fully_connected, l1_regularizer
import random
import os
import time

Collecting comet_ml
[?25l  Downloading https://files.pythonhosted.org/packages/dd/e4/53163c4e271c5a7872973135d45efb289e572c6351fa31c94cc8a98d91e1/comet_ml-3.0.3-py3-none-any.whl (178kB)
[K     |█▉                              | 10kB 19.9MB/s eta 0:00:01[K     |███▊                            | 20kB 2.2MB/s eta 0:00:01[K     |█████▌                          | 30kB 3.2MB/s eta 0:00:01[K     |███████▍                        | 40kB 2.1MB/s eta 0:00:01[K     |█████████▏                      | 51kB 2.6MB/s eta 0:00:01[K     |███████████                     | 61kB 3.0MB/s eta 0:00:01[K     |████████████▉                   | 71kB 3.5MB/s eta 0:00:01[K     |██████████████▊                 | 81kB 4.0MB/s eta 0:00:01[K     |████████████████▌               | 92kB 4.5MB/s eta 0:00:01[K     |██████████████████▍             | 102kB 3.4MB/s eta 0:00:01[K     |████████████████████▎           | 112kB 3.4MB/s eta 0:00:01[K     |██████████████████████          | 122kB 3.4MB/s eta 

Using TensorFlow backend.


In [0]:
#@title Parameters
zu_list = [2]
ZU_DIM = 2
lambda_list = [0.1]
info_lambda = 0.1
# hyperparameters
BATCH_SIZE = 128
IMG_DIM = (28, 28, 1)
Z_DIM = 74  # noise
ZC_DIM = 10  # c_cat
OUTPUT_DIM = int(np.prod(IMG_DIM))
LAMBDA = 10
ITERS = 20001
CRITIC_ITER = 5
leakyrelu_alpha = 0.1
LR_D = 1e-3
LR_G = 1e-3
LR_Q = 1e-3

In [0]:
#@title Functions
def save_images(X, save_path):
    # [-1, 1] -> [0,255]
    X = (np.abs(127.5 * X + 127.4999)).astype('uint8')

    n_samples = X.shape[0]
    rows = int(np.sqrt(n_samples))
    while n_samples % rows != 0:
        rows -= 1

    nh, nw = rows, n_samples // rows

    if X.ndim == 2:
        X = np.reshape(X, (X.shape[0], int(np.sqrt(X.shape[1])), int(np.sqrt(X.shape[1]))))

    if X.ndim == 4:
        h, w = X[0].shape[:2]
        img = np.zeros((h * nh, w * nw, 3))

    elif X.ndim == 3:
        h, w = X[0].shape[:2]
        img = np.zeros((h * nh, w * nw))

    for n, x in enumerate(X):
        j = n // nw
        i = n % nw
        img[i * w:i * w + w, j * h:j * h + h] = x

    imageio.imwrite(save_path, img.astype('uint8'))

def lrelu(x):
    return tf.nn.relu(x) - leakyrelu_alpha * tf.nn.relu(-x)

def generator_tf(x, reuse=True):
    with tf.variable_scope("Generator", reuse=reuse):
        x = tf.identity(x, name="input")
        x = tf.layers.dense(x, 1024, activation=tf.nn.relu)
        x = tf.layers.dense(x, 7 * 7 * 128, activation=tf.nn.relu)
        x = tf.reshape(x, [-1, 7, 7, 128])
        x = tf.layers.conv2d_transpose(x, 64, 4, 2, padding='same', activation=tf.nn.relu)
        x = tf.layers.conv2d_transpose(x, 1, 4, 2, padding='same', activation=tf.nn.tanh)
        x = tf.identity(x, name="output")

        return x

def d_tf(x, reuse=True):
    with tf.variable_scope("Discriminator", reuse=reuse):
        x = tf.identity(x, name="input")
        x = tf.layers.conv2d(x, 64, 4, 2, padding='same', activation=lrelu)
        x = tf.layers.conv2d(x, 128, 4, 2, padding='same', activation=lrelu)
        x = tf.contrib.layers.flatten(x)
        x = tf.layers.dense(x, 1024, activation=lrelu)
        x = tf.layers.dense(x, 1)
        x = tf.identity(x, name="output")
        return x

def q_tf(x, reuse=True):
    with tf.variable_scope("Q", reuse=reuse):
        x = tf.identity(x, name="input")
        x = tf.layers.conv2d(x, 64, 4, 2, padding='same', activation=lrelu)
        x = tf.layers.conv2d(x, 128, 4, 2, padding='same', activation=lrelu)
        x = tf.contrib.layers.flatten(x)
        x = tf.layers.dense(x, 1024, activation=lrelu)
        x = fully_connected(x, ZC_DIM + ZU_DIM, activation_fn=None)
        x = tf.identity(x, name="output")
        return x

def q_cost_tf(z, q):
    # categorical part
    z_cat = z[:, : ZC_DIM]
    q_cat = q[:, : ZC_DIM]
    lcat = tf.nn.softmax_cross_entropy_with_logits(labels=z_cat, logits=q_cat)

    # uniform part
    z_uni = z[:, ZC_DIM: ZC_DIM + ZU_DIM]
    q_uni = q[:, ZC_DIM: ZC_DIM + ZU_DIM]

    luni = 0.5 * tf.square(z_uni - q_uni);

    return tf.reduce_mean(lcat) + info_lambda * tf.reduce_mean(luni);
  
def q_cost_gmm(z, q):


    # categorical part
    z_cat = z[:, : ZC_DIM]
    q_cat = q[:, : ZC_DIM]
    lcat = tf.nn.softmax_cross_entropy_with_logits(labels=z_cat, logits=q_cat)

    # uniform part
    z_uni = z[:, ZC_DIM: ZC_DIM + ZU_DIM]
    q_uni = q[:, ZC_DIM: ZC_DIM + ZU_DIM]

    luni = 0.5 * tf.square(z_uni - q_uni);

    return tf.reduce_mean(lcat) + info_lambda * tf.reduce_mean(luni);



def prepare_mnist_list(X):
    X = (X.astype(np.float32) - 127.5) / 127.5
    X = X[:, :, :, None]
    return list(X)

def random_uc():
    idxs = np.random.randint(ZC_DIM, size=BATCH_SIZE)
    onehot = np.zeros((BATCH_SIZE, ZC_DIM))
    onehot[np.arange(BATCH_SIZE), idxs] = 1
    return onehot

def random_z():
    rez = np.zeros([BATCH_SIZE, Z_DIM])
    rez[:, : ZC_DIM] = random_uc()
    rez[:, ZC_DIM:] = np.random.uniform(-1, 1, size=(BATCH_SIZE, Z_DIM - ZC_DIM))
    return rez;

def static_uc(n_row):
    idxs = np.array(list(range(ZC_DIM)))
    onehot = np.zeros((n_row, ZC_DIM))
    onehot[np.arange(n_row), idxs] = 1
    return onehot

def static_z(c_idx, n_row):  # c_idx = 1 -> c1, 1 to n.
    onehot = static_uc(n_row)
    cts = np.linspace(-1, 1, n_row)
    rez = []

    for cls in onehot:
        for val in cts:
            current = np.zeros(Z_DIM)
            current[: ZC_DIM] = cls
            current[ZC_DIM + (c_idx - 1): ZC_DIM + c_idx] = val
            rez.append(current)

    rez = np.array(rez).reshape((n_row * ZC_DIM, Z_DIM))
    return rez

In [0]:
#@title Prepare
tf.reset_default_graph()
(X_train, _), (X_test, _) = mnist.load_data()
X_train_list = prepare_mnist_list(X_train)
X_test_list = prepare_mnist_list(X_test)

# Initialize Models
real_data = tf.placeholder(tf.float32, (None, *IMG_DIM))
z_ph = tf.placeholder(tf.float32, (None, Z_DIM))
fake_data = generator_tf(z_ph, reuse=False)
d_on_real_data = d_tf(real_data, reuse=False)
d_on_fake_data = d_tf(fake_data)
q_on_fake_data = q_tf(fake_data, reuse=False)

# wgan
alpha = tf.random_uniform(shape=[tf.shape(fake_data)[0], 1, 1, 1], minval=0., maxval=1.)
interpolates = real_data + alpha * (fake_data - real_data)
gradients = tf.gradients(d_tf(interpolates), [interpolates])[0]
slopes = tf.sqrt(tf.reduce_sum(tf.square(gradients), reduction_indices=[1, 2, 3]))
gradient_penalty = tf.reduce_mean((slopes - 1) ** 2)

# cost
q_cost = q_cost_tf(z_ph, q_on_fake_data)
g_cost = -tf.reduce_mean(d_on_fake_data)
d_cost = tf.reduce_mean(d_on_fake_data) - tf.reduce_mean(d_on_real_data) + LAMBDA * gradient_penalty

# params
g_param = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='Generator')
d_param = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='Discriminator')
q_param = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='Q')

# optimizers
g_train_op = tf.train.RMSPropOptimizer(learning_rate=LR_G).minimize(g_cost, var_list=g_param)
d_train_op = tf.train.RMSPropOptimizer(learning_rate=LR_D).minimize(d_cost, var_list=d_param)
q_train_op = tf.train.RMSPropOptimizer(learning_rate=LR_Q).minimize(q_cost, var_list=q_param + g_param)

saver = tf.train.Saver(max_to_keep=5)
sess = tf.Session()
sess.run(tf.global_variables_initializer())

fix_z = []

for i in range(ZU_DIM):
    fix_z.append(static_z((i+1), 10))

os.system("mkdir -p .")
f_train_stat = open("./train_log.txt", "w", buffering=1)
f_test_stat = open("./test_log.txt", "w", buffering=1)

os.system("mkdir -p " + " ./save")

for i in range(ZU_DIM):
    os.system("mkdir -p " + " ./figs_c" + str(i+1))

Downloading data from https://s3.amazonaws.com/img-datasets/mnist.npz
Instructions for updating:
Use keras.layers.Dense instead.
Instructions for updating:
Please use `layer.__call__` method instead.
Instructions for updating:
Use `tf.keras.layers.Conv2DTranspose` instead.
Instructions for updating:
Use `tf.keras.layers.Conv2D` instead.
The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.

Instructions for updating:
Use keras.layers.flatten instead.
Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See `tf.nn.softmax_cross_entropy_with_logits_v2`.

Instructions for updating:
Call initializer i

In [0]:
# GMM stuff
# mu_vector
# sigma_vector -> gamma_vector
# pi_vector
# initialization: https://stackoverflow.com/questions/43284047/what-is-the-default-kernel-initializer-in-tf-layers-conv2d-and-tf-layers-dense
    # W = sqrt(6 / (fan_in + fan_out))
    # Initialize the network weights so they are uniformly distributed over an interval [-W, W].
    # Initialize the means of the gaussians so they are spaced evenly over [-W, W].
    # Initialize the variances equal to the spacing between adjacent means.
    # Initialize all mixing proportions equal to each other.
# updates
    # To keep variances positive, model them as exp(gamma) = sigma, and gradient descent on gamma.
    # Data we are modeling (w) does not have a stationary distribution ->
    # to avoid stability problems, rate of change of weights must be tied to the rate of change of the mixture parameters.
    # therefore, update <w, mu, sigma, pi> simultaneously.

W = tf.constant(np.sqrt(6.0 / (ZC_DIM + ZU_DIM + 1024))) # [-W, W] initialized, for fc layer
mu_vector = tf.Variable(tf.cast(tf.linspace(-W, W, ZU_DIM), tf.float32)) # evenly spaced mus
gamma_vector = tf.Variable(tf.cast(tf.log(tf.repeat(mu_vector[1] - mu_vector[0], ZU_DIM)), tf.float32))
pi_vector = tf.Variable(tf.cast(tf.ones(ZU_DIM) / float(ZU_DIM), tf.float32)) # equal to each other, sum up to 1
tf.initialize_all_variables().run(session=sess)

Instructions for updating:
Use Variable.read_value. Variables in 2.X are initialized automatically both in eager and graph (inside tf.defun) contexts.
Instructions for updating:
Use `tf.global_variables_initializer` instead.


In [0]:
var = [v for v in tf.trainable_variables()]
fc_weights = tf.trainable_variables()[-2]
fc_biases = tf.trainable_variables()[-1]
fc_weights_vals = sess.run(fc_weights)
fc_biases_vals = sess.run(fc_biases)


def p_j(j, w):
  var_j = tf.cast(tf.exp(gamma_vector[j]), tf.float32)
  mu_j = tf.cast(mu_vector[j], tf.float32)
  p1 = 1.0 / tf.math.sqrt(2.0 * np.pi * var_j)
  p2 = tf.exp( -tf.math.square(w - mu_j) / (2.0 * var_j) )
  return p1 * p2


def p_mix(w):
  result = 0
  for j in range(ZU_DIM): result += pi_vector[j] * p_j(j, w)
  return result


def regularizer(w):  
  return -tf.reduce_sum(tf.log(p_mix(w)))


def r_j(j, w): # responsibility
  return (pi_vector[j] * p_j(j, w)) / p_mix(w)
  

def d_gamma_j(j, w): # this needs to be calculated over gamma, figure out what this becomes w/ gamma
  mu_j = tf.cast(mu_vector[j], tf.float32)
  result = tf.cast(tf.exp(gamma_vector[j]), tf.float32) * 0.5 * r_j(j, w) * tf.square(w - mu_j)
  return -tf.reduce_sum(result)

 
def d_mu_j(j, w):
  mu_j = tf.cast(mu_vector[j], tf.float32)
  var_j = tf.cast(tf.exp(gamma_vector[j]), tf.float32)  
  return tf.reduce_sum(r_j(j, w) * (mu_j - w) / var_j)


def d_pi_j(j, w):
  return tf.reduce_sum(1.0 - (r_j(j, w) / pi_vector[j]))


def update_regularizer(lr, w):
  mu_list = []
  gamma_list = []
  pi_list = []
  for j in range(ZU_DIM):
    mu_list.append(mu_vector[j] - lr * d_mu_j(j, w))
    gamma_list.append(gamma_vector[j] - lr * d_gamma_j(j, w))
    pi_list.append(pi_vector[j] - lr * d_pi_j(j, w))

  return tf.stack(mu_list), tf.stack(gamma_list), tf.stack(pi_list)


# for all j, these need to be done.
for i in range(100):
  mu_vector, gamma_vector, pi_vector = sess.run(update_regularizer(0.01, fc_weights))
  print(mu_vector, gamma_vector, pi_vector)


KeyboardInterrupt: ignored

In [0]:
#@title Train loop.
experiment = Experiment(api_key="plg42bGPkFkyBcCXbg7RC8xys", project_name="ColabPrototypes", workspace="egebeyazit93")
experiment.set_name('GMM Regularizer')
for it in range(ITERS):
    start_time = time.time()

    # wgan update
    for i in range(CRITIC_ITER):
        data = np.array(random.sample(X_train_list, BATCH_SIZE))
        d_cost_rez, _ = sess.run([d_cost, d_train_op], feed_dict={real_data: data, z_ph: random_z()})

    # info update
    g_cost_rez, q_cost_rez, _, _ = sess.run([g_cost, q_cost, g_train_op, q_train_op], feed_dict={z_ph: random_z()})
    f_train_stat.write("%i %g %g %g\n" % (it, g_cost_rez, d_cost_rez, q_cost_rez))

    if it % 100 == 0:
        # sample and save
        print("Training it: %i, time/it: %g, d_cost: %g" % (it, (time.time() - start_time), d_cost_rez))
        for i in range(ZU_DIM):
            samples = sess.run([fake_data], feed_dict={z_ph: fix_z[i]})
            save_images(np.squeeze(samples), './figs_c%i/samples_%.6i.png' % ((i+1), it))

        # don't log every single iteration
        experiment.log_metric("g_train_loss", g_cost_rez, step=it)
        experiment.log_metric("d_train_loss", d_cost_rez, step=it)
        experiment.log_metric("q_train_loss", q_cost_rez, step=it)

        for var in tf.global_variables():
            if var.name == 'Q/fully_connected/weights:0': experiment.log_histogram_3d(sess.run(var))
            if var.name == 'Q/fully_connected/biases:0': experiment.log_histogram_3d(sess.run(var))