In [5]:
import os, sys
os.environ['CUDA_VISIBLE_DEVICES'] = '2'
sys.path.append('../')

In [6]:
import argparse, json
from tqdm import tqdm_notebook as tqdm

In [7]:
import os.path as osp
from data.pointcloud_dataset import load_one_class_under_folder
from utils.dirs import mkdir_and_rename
from utils.tf import reset_tf_graph

In [8]:
opt = {
    'data': {
        'data_root':
        '/orion/u/jiangthu/projects/latent_3d_points/data/shape_net_core_uniform_samples_2048',
        'class_name': 'airplane',
        'n_thread': 20
    },
    'model': {
        'type': 'wgan',
        'num_points': 2048,
        'noise_dim': 128,
        'noise_params': {
            'mu': 0,
            'sigma': 0.2
        }
    },
    'train': {
        'batch_size': 50,
        'learning_rate': 0.0001,
        'beta': 0.5,
        'z_rotate': False,
        'saver_step': 100
    },
    'path': {
        'train_root': './experiments',
        'experiment_name': 'single_class_gan_chair_noise128'
    }
}
train_dir = osp.join(opt['path']['train_root'], opt['path']['experiment_name'])
train_opt = opt['train']

In [10]:
import numpy as np
import tensorflow as tf
from utils.tf import leaky_relu
from utils.tf import expand_scope_by_name
from tflearn.layers.normalization import batch_normalization
from tflearn.layers.core import fully_connected, dropout
from tflearn.layers.conv import conv_1d
from utils.tf import expand_scope_by_name, replicate_parameter_for_all_layers
import tflearn

In [11]:
def encoder_with_convs_and_symmetry(in_signal,
                                    init_list,
                                    n_filters=[64, 128, 256, 1024],
                                    filter_sizes=[1],
                                    strides=[1],
                                    non_linearity=tf.nn.relu,
                                    weight_decay=0.001,
                                    symmetry=tf.reduce_max,
                                    regularizer=None,
                                    scope=None,
                                    reuse=False,
                                    padding='same',
                                    verbose=False,
                                    conv_op=conv_1d):
    '''An Encoder (recognition network), which maps inputs onto a latent space.
    '''

    if verbose:
        print('Building Encoder')

    n_layers = len(n_filters)
    filter_sizes = replicate_parameter_for_all_layers(filter_sizes, n_layers)
    strides = replicate_parameter_for_all_layers(strides, n_layers)

    if n_layers < 2:
        raise ValueError('More than 1 layers are expected.')

    for i in range(n_layers):
        if i == 0:
            layer = in_signal

        name = 'encoder_conv_layer_' + str(i)
        scope_i = expand_scope_by_name(scope, name)
        layer = conv_op(layer,
                        nb_filter=n_filters[i],
                        filter_size=filter_sizes[i],
                        strides=strides[i],
                        regularizer=regularizer,
                        weight_decay=weight_decay,
                        name=name,
                        reuse=reuse,
                        scope=scope_i,
                        padding=padding,
                        weights_init=tf.constant_initializer(init_list[i][0]),
                        bias_init=tf.constant_initializer(init_list[i][1]))

        if non_linearity is not None:
            layer = non_linearity(layer)

        if verbose:
            print(layer)
            print('output size:', np.prod(layer.get_shape().as_list()[1:]),
                  '\n')

    if symmetry is not None:
        layer = symmetry(layer, axis=1)
        if verbose:
            print(layer)

    return layer


def decoder_with_fc_only(latent_signal,
                         init_list,
                         layer_sizes=[],
                         non_linearity=tf.nn.relu,
                         regularizer=None,
                         weight_decay=0.001,
                         reuse=False,
                         scope=None,
                         verbose=False):
    '''A decoding network which maps points from the latent space back onto the data space.
    '''
    if verbose:
        print('Building Decoder')

    n_layers = len(layer_sizes)

    if n_layers < 2:
        raise ValueError(
            'For an FC decoder with single a layer use simpler code.')

    for i in range(0, n_layers - 1):
        name = 'decoder_fc_' + str(i)
        scope_i = expand_scope_by_name(scope, name)

        if i == 0:
            layer = latent_signal

        layer = fully_connected(
            layer,
            layer_sizes[i],
            activation='linear',
            weights_init=tf.constant_initializer(init_list[i][0]),
            bias_init=tf.constant_initializer(init_list[i][1]),
            name=name,
            regularizer=regularizer,
            weight_decay=weight_decay,
            reuse=reuse,
            scope=scope_i)

        if verbose:
            print(name,
                  'FC params = ',
                  np.prod(layer.W.get_shape().as_list()) +
                  np.prod(layer.b.get_shape().as_list()),
                  end=' ')

        if non_linearity is not None:
            layer = non_linearity(layer)

        if verbose:
            print(layer)
            print('output size:', np.prod(layer.get_shape().as_list()[1:]),
                  '\n')

    # Last decoding layer never has a non-linearity.
    name = 'decoder_fc_' + str(n_layers - 1)
    scope_i = expand_scope_by_name(scope, name)
    layer = fully_connected(layer,
                            layer_sizes[n_layers - 1],
                            activation='linear',
                            weights_init=tf.constant_initializer(init_list[-1][0]),
                            bias_init=tf.constant_initializer(init_list[-1][1]),
                            name=name,
                            regularizer=regularizer,
                            weight_decay=weight_decay,
                            reuse=reuse,
                            scope=scope_i)
    if verbose:
        print(name,
              'FC params = ',
              np.prod(layer.W.get_shape().as_list()) +
              np.prod(layer.b.get_shape().as_list()),
              end=' ')

    if verbose:
        print(layer)
        print('output size:', np.prod(layer.get_shape().as_list()[1:]), '\n')

    return layer


def mlp_discriminator(in_signal,
                      cov_init_list,
                      fc_init_list,
                      non_linearity=tf.nn.relu,
                      reuse=False,
                      scope=None):
    ''' used in nips submission.
    '''
    encoder_args = {
        'n_filters': [64, 128, 256, 256, 512],
        'filter_sizes': [1, 1, 1, 1, 1],
        'strides': [1, 1, 1, 1, 1]
    }
    encoder_args['reuse'] = reuse
    encoder_args['scope'] = scope
    encoder_args['non_linearity'] = non_linearity
    layer = encoder_with_convs_and_symmetry(in_signal, cov_init_list, weight_decay=0.0,
                                            **encoder_args)

    name = 'decoding_logits'
    scope_e = expand_scope_by_name(scope, name)
    d_logit = decoder_with_fc_only(layer,
                                   fc_init_list,
                                   layer_sizes=[128, 64, 1],
                                   reuse=reuse,
                                   scope=scope_e,
                                   weight_decay=0.0)
    d_prob = tf.nn.sigmoid(d_logit)
    return d_prob, d_logit


def point_cloud_generator(z,
                          pc_dims,
                          init_list,
                          layer_sizes=[64, 128, 512, 1024],
                          non_linearity=tf.nn.relu):
    ''' used in nips submission.
    '''

    n_points, dummy = pc_dims
    if (dummy != 3):
        raise ValueError()

    out_signal = decoder_with_fc_only(z,
                                      init_list[:-1],
                                      layer_sizes=layer_sizes,
                                      non_linearity=non_linearity, weight_decay=0.0)
    out_signal = non_linearity(out_signal)


    out_signal = fully_connected(out_signal,
                                 np.prod([n_points, 3]),
                                 activation='linear',
                                 weights_init=tf.constant_initializer(init_list[-1][0]),
                                 bias_init=tf.constant_initializer(init_list[-1][1]),
                                 weight_decay=0.0)
    out_signal = tf.reshape(out_signal, [-1, n_points, 3])
    return out_signal

In [12]:
from trainers.gan import GAN
from tflearn import is_training
class PGAN(GAN):
    '''Gradient Penalty.
    https://arxiv.org/abs/1704.00028
    '''

    def __init__(self, name, learning_rate, lam, n_output, noise_dim, discriminator, generator, beta=0.5, gen_kwargs={}, disc_kwargs={}, graph=None):

        GAN.__init__(self, name, graph)
        
        self.noise_dim = noise_dim
        self.n_output = n_output
        self.discriminator = discriminator
        self.generator = generator
    
        with tf.variable_scope(name):
            self.noise = tf.placeholder(tf.float32, shape=[None, noise_dim])            # Noise vector.
            self.real_pc = tf.placeholder(tf.float32, shape=[None] + self.n_output)     # Ground-truth.

            with tf.variable_scope('generator'):
                self.generator_out = self.generator(self.noise, self.n_output, **gen_kwargs)
                
            with tf.variable_scope('discriminator') as scope:
                self.real_prob, self.real_logit = self.discriminator(self.real_pc, scope=scope, **disc_kwargs)
                self.synthetic_prob, self.synthetic_logit = self.discriminator(self.generator_out, reuse=True, scope=scope, **disc_kwargs)
            
            
            # Compute WGAN losses
            self.loss_d_logit = tf.reduce_mean(self.synthetic_logit) - tf.reduce_mean(self.real_logit)
            self.loss_g = -tf.reduce_mean(self.synthetic_logit)

#             # Compute gradient penalty at interpolated points
#             ndims = self.real_pc.get_shape().ndims
#             batch_size = tf.shape(self.real_pc)[0]
#             alpha = 0.5
#             differences = self.generator_out - self.real_pc
#             interpolates = self.real_pc + (alpha * differences)

#             with tf.variable_scope('discriminator') as scope:
#                 gradients = tf.gradients(self.discriminator(interpolates, reuse=True, scope=scope, **disc_kwargs)[1], [interpolates])[0]

#             # Reduce over all but the first dimension
#             slopes = tf.sqrt(tf.reduce_sum(tf.square(gradients), reduction_indices=list(range(1, ndims))))
#             self.gradient_penalty = tf.reduce_mean((slopes - 1.) ** 2)
#             self.loss_d = self.loss_d_logit + lam * self.gradient_penalty
            self.loss_d = self.loss_d_logit

            train_vars = tf.trainable_variables()
            d_params = [v for v in train_vars if v.name.startswith(name + '/discriminator/')]
            g_params = [v for v in train_vars if v.name.startswith(name + '/generator/')]
            
            self.opt_d = self.optimizer(learning_rate, beta, self.loss_d, d_params)
            self.opt_g = self.optimizer(learning_rate, beta, self.loss_g, g_params)
#             self.optimizer_d = tf.train.AdamOptimizer(learning_rate, beta1=beta)
#             self.opt_d = self.optimizer_d.minimize(self.loss_d, var_list=d_params)
#             self.optimizer_g = tf.train.AdamOptimizer(learning_rate, beta1=beta)
#             self.opt_g = self.optimizer_g.minimize(self.loss_g, var_list=g_params)

            self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=None)
            self.init = tf.global_variables_initializer()

            # Launch the session
            config = tf.ConfigProto()
            config.gpu_options.allow_growth = True
            self.sess = tf.Session(config=config)
            self.sess.run(self.init)

In [13]:
# model
discriminator = mlp_discriminator
generator = point_cloud_generator

In [11]:
np.random.seed(0)
g_fc_channel = [128, 64, 128, 512, 1024, 6144]
d_cov_channel = [3, 64, 128, 256, 256, 512]
d_fc_channel = [512, 128, 64, 1]
g_fc_weight = []
for i in range(len(g_fc_channel) - 1):
    in_c = g_fc_channel[i]
    out_c = g_fc_channel[i + 1]
    g_fc_weight.append(
        (np.random.rand(in_c, out_c).astype(np.float32) * 0.1 - 0.05,
         np.random.rand(out_c).astype(np.float32) * 0.1 - 0.05))

d_cov_weight = []
for i in range(len(d_cov_channel) - 1):
    in_c = d_cov_channel[i]
    out_c = d_cov_channel[i + 1]
    d_cov_weight.append((np.random.rand(in_c, out_c).astype(np.float32) * 0.1 - 0.05,
                         np.random.rand(out_c).astype(np.float32) * 0.1 - 0.05))

d_fc_weight = []
for i in range(len(d_fc_channel) - 1):
    in_c = d_fc_channel[i]
    out_c = d_fc_channel[i + 1]
    d_fc_weight.append((np.random.rand(in_c, out_c).astype(np.float32) * 0.1 - 0.05,
                        np.random.rand(out_c).astype(np.float32) * 0.1 - 0.05))

input_noise = [np.random.rand(4, 128).astype(np.float32) * 0.1 - 0.05 for _ in range(10)]
target_points = [
    np.random.rand(4, 2048, 3).astype(np.float32) * 0.1 - 0.05 for _ in range(10)
]

In [12]:
reset_tf_graph()
tf.random.set_random_seed(0)
model_opt = opt['model']
if model_opt['type'] == 'wgan':
    lam = 10
    disc_kwargs = {'cov_init_list': d_cov_weight, 'fc_init_list': d_fc_weight}
    gen_kwargs = {'init_list': g_fc_weight}
    gan = PGAN(model_opt['type'],
               train_opt['learning_rate'],
               lam, [model_opt['num_points'], 3],
               model_opt['noise_dim'],
               discriminator,
               generator,
               disc_kwargs=disc_kwargs,
               gen_kwargs=gen_kwargs,
               beta=train_opt['beta'])

In [13]:
for i in range(10):
    feed_dict = {gan.real_pc: target_points[i], gan.noise: input_noise[i]}
    _, loss_d = gan.sess.run([gan.opt_d, gan.loss_d], feed_dict=feed_dict)
    feed_dict = {gan.noise: input_noise[i]}
    _, loss_g = gan.sess.run([gan.opt_g, gan.loss_g], feed_dict=feed_dict)
    print(loss_d, loss_g)

-2.1327287e-07 -0.00828593
-2.4586916e-07 -0.008225871
-2.933666e-07 -0.008159315
-3.6507845e-07 -0.008088458
-4.3399632e-07 -0.008014243
-4.833564e-07 -0.007936596
-5.3923577e-07 -0.007855793
-5.9977174e-07 -0.00777228
-6.495975e-07 -0.007685996
-7.21775e-07 -0.007597178


In [13]:
for i in range(10):
    feed_dict = {gan.real_pc: target_points[i], gan.noise: input_noise[i]}
    _, loss_d = gan.sess.run([gan.opt_d, gan.loss_d], feed_dict=feed_dict)
    feed_dict = {gan.noise: input_noise[i]}
    _, loss_g = gan.sess.run([gan.opt_g, gan.loss_g], feed_dict=feed_dict)
    print(loss_d, loss_g)

-2.1327287e-07 -0.00828593
-2.4586916e-07 -0.008225888
-2.924353e-07 -0.008159323
-3.6600977e-07 -0.008088484
-4.3120235e-07 -0.00801428
-4.833564e-07 -0.007936636
-5.3830445e-07 -0.0078557655
-5.9977174e-07 -0.0077722715
-6.421469e-07 -0.007686084
-7.2224066e-07 -0.007597371


In [270]:
i = 0
feed_dict = {gan.real_pc: target_points[i], gan.noise: input_noise[i]}
_, loss_d, loss_d_logit, gradient_penalty = gan.sess.run(
    [gan.opt_d, gan.loss_d, gan.loss_d_logit, gan.gradient_penalty],
    feed_dict=feed_dict)

In [273]:
float(loss_d), float(gradient_penalty), float(loss_d_logit)

(9.999931335449219, 0.999993085861206, -2.1327286958694458e-07)

In [274]:
gen_var = gan.sess.run(tf.trainable_variables('wgan/dis'))
for v in gen_var:
    print(v.reshape(-1)[0])

0.041308507
0.013890814
0.0013969338
0.041102726
-0.040633775
-0.045540597
0.023586925
0.03344834
-0.009558369
0.0005664937
0.038841397
0.020789754
0.008475412
0.032563414
0.037454013
0.0066774487


In [148]:
# reset_tf_graph()
# np.random.seed(0)
# w = np.random.rand(3, 4).astype(np.float32)
# b = np.random.rand(4).astype(np.float32)
# in_f = np.random.rand(2, 3).astype(np.float32)

# in_feat = tf.placeholder(tf.float32, [None, 3])
# out = fully_connected(in_feat,
#                       4,
#                       weights_init=tf.constant_initializer(w),
#                       bias_init=tf.constant_initializer(b))

# with tf.Session() as sess:
#     sess.run(tf.global_variables_initializer())
#     res = sess.run([out], feed_dict = {in_feat: in_f})
# print(res[0])

[[1.6817647 1.7762184 1.063653  1.252217 ]
 [2.2302346 2.4863346 1.6563923 1.8565607]]
