DISCRETE AUTOENCODERS FOR SEQUENCE MODELS   
https://arxiv.org/pdf/1801.09797.pdf   
https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/models/research/transformer_vae.py

In [18]:
# Imports we need.
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
import os
import collections

from tensor2tensor import models
from tensor2tensor import problems
from tensor2tensor.layers import common_layers
from tensor2tensor.utils import trainer_lib
from tensor2tensor.utils import t2t_model
from tensor2tensor.utils import registry
from tensor2tensor.utils import metrics

In [19]:
# Enable TF Eager execution
tfe = tf.contrib.eager
tfe.enable_eager_execution()

# Other setup
Modes = tf.estimator.ModeKeys

In [10]:
# Setup some directories
data_dir = os.path.expanduser("t2t/data")
tmp_dir = os.path.expanduser("t2t/tmp")
train_dir = os.path.expanduser("t2t/train")
checkpoint_dir = os.path.expanduser("t2t/checkpoints")
if not(os.path.isdir(data_dir)):
    os.makedirs(data_dir)
if not(os.path.isdir(tmp_dir)):
    os.makedirs(dtmp_dir)
if not(os.path.isdir(train_dir)):
    os.makedirs(train_dir)
if not(os.path.isdir(checkpoint_dir)):
    os.makedirs(checkpoint_dir)

In [5]:
problems.available()

['algorithmic_addition_binary40',
 'algorithmic_addition_decimal40',
 'algorithmic_cipher_shift200',
 'algorithmic_cipher_shift5',
 'algorithmic_cipher_vigenere200',
 'algorithmic_cipher_vigenere5',
 'algorithmic_identity_binary40',
 'algorithmic_identity_decimal40',
 'algorithmic_math_deepmind_all',
 'algorithmic_math_two_variables',
 'algorithmic_multiplication_binary40',
 'algorithmic_multiplication_decimal40',
 'algorithmic_reverse_binary40',
 'algorithmic_reverse_binary40_test',
 'algorithmic_reverse_decimal40',
 'algorithmic_reverse_nlplike32k',
 'algorithmic_reverse_nlplike8k',
 'algorithmic_shift_decimal40',
 'algorithmic_sort_problem',
 'audio_timit_characters_tune',
 'audio_timit_tokens8k_test',
 'audio_timit_tokens8k_tune',
 'babi_qa_concat_all_tasks_10k',
 'babi_qa_concat_all_tasks_1k',
 'babi_qa_concat_task10_10k',
 'babi_qa_concat_task10_1k',
 'babi_qa_concat_task11_10k',
 'babi_qa_concat_task11_1k',
 'babi_qa_concat_task12_10k',
 'babi_qa_concat_task12_1k',
 'babi_qa_con

In [28]:
ende_problem = problems.problem("languagemodel_en_wiki32k")
ende_problem.generate_data(data_dir, tmp_dir)

INFO:tensorflow:Generating vocab file: t2t/data\vocab.languagemodel_en_wiki32k.32000.subwords
INFO:tensorflow:Downloading https://drive.google.com/uc?export=download&id=1-l02fI15ieMIZk8EnXhzhsvuEYRoznZ8&confirm=V3YJ to t2t/tmp\enwiki_train.txt.gz


KeyboardInterrupt: 

In [23]:
# Copy the vocab file locally so we can encode inputs and decode model outputs
# All vocabs are stored on GCS
vocab_name = "vocab.translate_ende_wmt32k.32768.subwords"
vocab_file = os.path.join(gs_data_dir, vocab_name)
# !gsutil cp {vocab_file} {data_dir}

# Get the encoders from the problem
encoders = ende_problem.feature_encoders(data_dir)

# Setup helper functions for encoding and decoding
def encode(input_str, output_str=None):
    """Input str to features dict, ready for inference"""
    inputs = encoders["inputs"].encode(input_str) + [1]  # add EOS id
    batch_inputs = tf.reshape(inputs, [1, -1, 1])  # Make it 3D.
    return {"inputs": batch_inputs}

def decode(integers):
    """List of ints to str"""
    integers = list(np.squeeze(integers))
    if 1 in integers:
        integers = integers[:integers.index(1)]
    return encoders["inputs"].decode(np.squeeze(integers))

In [14]:
# There are many models available in Tensor2Tensor
registry.list_models()

['aligned',
 'attention_lm',
 'attention_lm_moe',
 'autoencoder_autoregressive',
 'autoencoder_basic',
 'autoencoder_basic_discrete',
 'autoencoder_dual_discrete',
 'autoencoder_ordered_discrete',
 'autoencoder_residual',
 'autoencoder_residual_discrete',
 'autoencoder_residual_vae',
 'autoencoder_stacked',
 'basic_fc_relu',
 'byte_net',
 'cycle_gan',
 'dense_bitwise_categorical_policy',
 'diagonal_neural_gpu',
 'distillation',
 'evolved_transformer',
 'feed_forward_categorical_policy',
 'feed_forward_cnn_small_categorical_policy',
 'feed_forward_cnn_small_categorical_policy_new',
 'gene_expression_conv',
 'glow',
 'imagetransformer',
 'imagetransformer2d',
 'imagetransformer_moe',
 'img2img_transformer',
 'img2img_transformer_block_parallel',
 'lstm_encoder',
 'lstm_seq2seq',
 'lstm_seq2seq_attention',
 'lstm_seq2seq_attention_bidirectional_encoder',
 'lstm_seq2seq_bidirectional_encoder',
 'mtf_bitransformer',
 'mtf_image_transformer',
 'mtf_res_net',
 'mtf_transformer',
 'mtf_unitran

In [20]:
# Create hparams and the model
model_name = "transformer_ae"
hparams_set = "transformer_ae_base"

hparams = trainer_lib.create_hparams(hparams_set, data_dir=data_dir, 
                                     problem_name="translate_ende_wmt32k")

# NOTE: Only create the model once when restoring from a checkpoint; it's a
# Layer and so subsequent instantiations will have different variable scopes
# that will not match the checkpoint.
translate_model = registry.model(model_name)(hparams, Modes.EVAL)

INFO:tensorflow:Setting T2TModel mode to 'eval'
INFO:tensorflow:Setting hparams.dropout to 0.0
INFO:tensorflow:Setting hparams.label_smoothing to 0.0
INFO:tensorflow:Setting hparams.layer_prepostprocess_dropout to 0.0
INFO:tensorflow:Setting hparams.symbol_dropout to 0.0
INFO:tensorflow:Setting hparams.attention_dropout to 0.0
INFO:tensorflow:Setting hparams.relu_dropout to 0.0
INFO:tensorflow:Setting hparams.word_dropout to 0.0
INFO:tensorflow:Setting hparams.z_dropout to 0.0


In [25]:
# Copy the pretrained checkpoint locally
ckpt_name = "transformer_ende_test"
gs_ckpt = os.path.join(gs_ckpt_dir, ckpt_name)
# !gsutil -q cp -R {gs_ckpt} {checkpoint_dir}
ckpt_path = tf.train.latest_checkpoint(os.path.join(checkpoint_dir, ckpt_name))

In [27]:
print(ckpt_path)

None


In [33]:
t2t-trainer --generate_data --data_dir=t2t_data --output_dir=t2t_train/LM_ptb_characters  --problem=languagemodel_ptb_characters  --model=transformer_ae  --hparams_set=transformer_ae_small  --train_steps=1000  --eval_steps=100

SyntaxError: can't assign to operator (<ipython-input-33-25a68dae0758>, line 1)