In [1]:
import sys
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '5'
import os.path as osp
sys.path.append(os.path.abspath('./'))
from data.pointcloud_dataset import load_one_class_under_folder
from trainers.point_net_ae import PointNetAutoEncoder
from trainers.autoencoder import default_train_params, Configuration
from models.ae_models import mlp_architecture_ala_iclr_18
from utils.dirs import create_dir
from utils.tf import reset_tf_graph

Instructions for updating:
Colocations handled automatically by placer.


In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
class_name = 'chair'
data_root = '../latent_3d_points/data/shape_net_core_uniform_samples_2048/'
train_root = './experiments'
experiment_name = 'single_class_ae'
n_pc_points = 2048                # Number of points per model.
bneck_size = 128                  # Bottleneck-AE size
ae_loss = 'chamfer'                   # Loss to optimize: 'emd' or 'chamfer'

In [4]:
pc_dataset = load_one_class_under_folder(data_root, class_name, verbose=True)

6778 pclouds were loaded. They belong in 1 shape-classes.


In [5]:
train_params = default_train_params()
train_params['batch_size'] = 400

In [11]:
print(train_params)

{'batch_size': 400, 'training_epochs': 500, 'denoising': False, 'learning_rate': 0.0005, 'z_rotate': False, 'saver_step': 10, 'loss_display_step': 1}


In [6]:
encoder, decoder, enc_args, dec_args = mlp_architecture_ala_iclr_18(n_pc_points, bneck_size)
train_dir = osp.join(train_root, experiment_name)
create_dir(osp.join(train_root, experiment_name))

0

In [7]:
conf = Configuration(n_input = [n_pc_points, 3],
            loss = ae_loss,
            training_epochs = train_params['training_epochs'],
            batch_size = train_params['batch_size'],
            denoising = train_params['denoising'],
            learning_rate = train_params['learning_rate'],
            train_dir = train_dir,
            loss_display_step = train_params['loss_display_step'],
            saver_step = train_params['saver_step'],
            z_rotate = train_params['z_rotate'],
            encoder = encoder,
            decoder = decoder,
            encoder_args = enc_args,
            decoder_args = dec_args
           )
conf.experiment_name = experiment_name
conf.held_out_step = 5   # How often to evaluate/print out loss on 
                         # held_out data (if they are provided in ae.train() ).
conf.save(osp.join(train_dir, 'configuration'))

In [10]:
print(conf)

                    batch_size: 400
                 consistent_io: None
                         debug: False
                       decoder: decoder_with_fc_only
                  decoder_args: {'layer_sizes': [256, 256, 6144], 'b_norm': False, 'b_norm_finish': False, 'verbose': True}
                       encoder: encoder_with_convs_and_symmetry
                  encoder_args: {'n_filters': [64, 128, 128, 256, 128], 'filter_sizes': [1], 'strides': [1], 'b_norm': True, 'verbose': True}
               experiment_name: single_class_ae
                 gauss_augment: None
                 held_out_step: 5
                  is_denoising: False
               latent_vs_recon: 1.0
                 learning_rate: 0.0005
                          loss: chamfer
             loss_display_step: 1
                       n_input: [2048, 3]
                      n_output: [2048, 3]
                           n_z: None
             saver_max_to_keep: None
                    saver_step: 10
       

In [8]:
reset_tf_graph()
ae = PointNetAutoEncoder(conf.experiment_name, conf)

Building Encoder
Instructions for updating:
Use tf.initializers.variance_scaling instead with distribution=uniform to get equivalent behavior.
encoder_conv_layer_0 conv params =  256 bnorm params =  128
Tensor("single_class_ae_2/Relu:0", shape=(?, 2048, 64), dtype=float32)
output size: 131072 

encoder_conv_layer_1 conv params =  8320 bnorm params =  256
Tensor("single_class_ae_2/Relu_1:0", shape=(?, 2048, 128), dtype=float32)
output size: 262144 

encoder_conv_layer_2 conv params =  16512 bnorm params =  256
Tensor("single_class_ae_2/Relu_2:0", shape=(?, 2048, 128), dtype=float32)
output size: 262144 

encoder_conv_layer_3 conv params =  33024 bnorm params =  512
Tensor("single_class_ae_2/Relu_3:0", shape=(?, 2048, 256), dtype=float32)
output size: 524288 

encoder_conv_layer_4 conv params =  32896 bnorm params =  256
Tensor("single_class_ae_2/Relu_4:0", shape=(?, 2048, 128), dtype=float32)
output size: 262144 

Tensor("single_class_ae_2/Max:0", shape=(?, 128), dtype=float32)
Building

In [9]:
buf_size = 1 # Make 'training_stats' file to flush each output line regarding training.
fout = open(osp.join(conf.train_dir, 'train_stats.txt'), 'a', buf_size)
train_stats = ae.train(pc_dataset, conf, log_file=fout)
fout.close()

Epoch: 0001 training time (minutes)= 0.2072 loss= 0.018943852
INFO:tensorflow:./experiments/single_class_ae/models.ckpt-1 is not in all_model_checkpoint_paths. Manually adding it.
Epoch: 0002 training time (minutes)= 0.1639 loss= 0.003582052
Epoch: 0003 training time (minutes)= 0.1641 loss= 0.002676303
Epoch: 0004 training time (minutes)= 0.1662 loss= 0.002211126
Epoch: 0005 training time (minutes)= 0.1648 loss= 0.001947972
Epoch: 0006 training time (minutes)= 0.1665 loss= 0.001784731
Epoch: 0007 training time (minutes)= 0.1678 loss= 0.001664012
Epoch: 0008 training time (minutes)= 0.1675 loss= 0.001567298
Epoch: 0009 training time (minutes)= 0.1675 loss= 0.001510063
Epoch: 0010 training time (minutes)= 0.1703 loss= 0.001437147
INFO:tensorflow:./experiments/single_class_ae/models.ckpt-10 is not in all_model_checkpoint_paths. Manually adding it.
Epoch: 0011 training time (minutes)= 0.1665 loss= 0.001403775
Epoch: 0012 training time (minutes)= 0.1662 loss= 0.001351641
Epoch: 0013 trainin

Epoch: 0121 training time (minutes)= 0.1644 loss= 0.000803739
Epoch: 0122 training time (minutes)= 0.1649 loss= 0.000791127
Epoch: 0123 training time (minutes)= 0.1647 loss= 0.000778599
Epoch: 0124 training time (minutes)= 0.1651 loss= 0.000770227
Epoch: 0125 training time (minutes)= 0.1644 loss= 0.000801893
Epoch: 0126 training time (minutes)= 0.1649 loss= 0.000777943
Epoch: 0127 training time (minutes)= 0.1649 loss= 0.000754454
Epoch: 0128 training time (minutes)= 0.1647 loss= 0.000767145
Epoch: 0129 training time (minutes)= 0.1646 loss= 0.000756607
Epoch: 0130 training time (minutes)= 0.1647 loss= 0.000760412
INFO:tensorflow:./experiments/single_class_ae/models.ckpt-130 is not in all_model_checkpoint_paths. Manually adding it.
Epoch: 0131 training time (minutes)= 0.1643 loss= 0.000768216
Epoch: 0132 training time (minutes)= 0.1646 loss= 0.000765197
Epoch: 0133 training time (minutes)= 0.1644 loss= 0.000774820
Epoch: 0134 training time (minutes)= 0.1648 loss= 0.000776592
Epoch: 0135 

Epoch: 0232 training time (minutes)= 0.1647 loss= 0.000686072
Epoch: 0233 training time (minutes)= 0.1648 loss= 0.000695259
Epoch: 0234 training time (minutes)= 0.1649 loss= 0.000678005
Epoch: 0235 training time (minutes)= 0.1645 loss= 0.000661818
Epoch: 0236 training time (minutes)= 0.1645 loss= 0.000675541
Epoch: 0237 training time (minutes)= 0.1647 loss= 0.000670564
Epoch: 0238 training time (minutes)= 0.1648 loss= 0.000671036
Epoch: 0239 training time (minutes)= 0.1646 loss= 0.000668533
Epoch: 0240 training time (minutes)= 0.1655 loss= 0.000676023
INFO:tensorflow:./experiments/single_class_ae/models.ckpt-240 is not in all_model_checkpoint_paths. Manually adding it.
Epoch: 0241 training time (minutes)= 0.1647 loss= 0.000701186
Epoch: 0242 training time (minutes)= 0.1649 loss= 0.000705000
Epoch: 0243 training time (minutes)= 0.1654 loss= 0.000697509
Epoch: 0244 training time (minutes)= 0.1654 loss= 0.000692493
Epoch: 0245 training time (minutes)= 0.1648 loss= 0.000672637
Epoch: 0246 

Epoch: 0343 training time (minutes)= 0.1657 loss= 0.000635553
Epoch: 0344 training time (minutes)= 0.1653 loss= 0.000612767
Epoch: 0345 training time (minutes)= 0.1652 loss= 0.000612135
Epoch: 0346 training time (minutes)= 0.1648 loss= 0.000614685
Epoch: 0347 training time (minutes)= 0.1647 loss= 0.000619784
Epoch: 0348 training time (minutes)= 0.1652 loss= 0.000624606
Epoch: 0349 training time (minutes)= 0.1656 loss= 0.000661612
Epoch: 0350 training time (minutes)= 0.1651 loss= 0.000632982
INFO:tensorflow:./experiments/single_class_ae/models.ckpt-350 is not in all_model_checkpoint_paths. Manually adding it.
Epoch: 0351 training time (minutes)= 0.1652 loss= 0.000622033
Epoch: 0352 training time (minutes)= 0.1649 loss= 0.000622037
Epoch: 0353 training time (minutes)= 0.1647 loss= 0.000621099
Epoch: 0354 training time (minutes)= 0.1657 loss= 0.000630251
Epoch: 0355 training time (minutes)= 0.1642 loss= 0.000625086
Epoch: 0356 training time (minutes)= 0.1643 loss= 0.000626860
Epoch: 0357 

Epoch: 0454 training time (minutes)= 0.1649 loss= 0.000591397
Epoch: 0455 training time (minutes)= 0.1649 loss= 0.000592791
Epoch: 0456 training time (minutes)= 0.1651 loss= 0.000581629
Epoch: 0457 training time (minutes)= 0.1650 loss= 0.000586400
Epoch: 0458 training time (minutes)= 0.1652 loss= 0.000577761
Epoch: 0459 training time (minutes)= 0.1686 loss= 0.000571890
Epoch: 0460 training time (minutes)= 0.1661 loss= 0.000579657
INFO:tensorflow:./experiments/single_class_ae/models.ckpt-460 is not in all_model_checkpoint_paths. Manually adding it.
Epoch: 0461 training time (minutes)= 0.1653 loss= 0.000586807
Epoch: 0462 training time (minutes)= 0.1653 loss= 0.000584272
Epoch: 0463 training time (minutes)= 0.1656 loss= 0.000618491
Epoch: 0464 training time (minutes)= 0.1655 loss= 0.000617320
Epoch: 0465 training time (minutes)= 0.1656 loss= 0.000586831
Epoch: 0466 training time (minutes)= 0.1652 loss= 0.000578385
Epoch: 0467 training time (minutes)= 0.1659 loss= 0.000593276
Epoch: 0468 