In [1]:
!pip install -q -U tensor2tensor
!pip install -q tensorflow matplotlib

[K     |████████████████████████████████| 1.4MB 4.9MB/s 
[K     |████████████████████████████████| 686kB 60.0MB/s 
[K     |████████████████████████████████| 296kB 59.2MB/s 
[K     |████████████████████████████████| 143kB 55.2MB/s 
[?25h  Building wheel for pypng (setup.py) ... [?25l[?25hdone


In [2]:
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
import os
import collections

from tensor2tensor import models
from tensor2tensor import problems
from tensor2tensor.layers import common_layers
from tensor2tensor.utils import trainer_lib
from tensor2tensor.utils import t2t_model
from tensor2tensor.utils import registry
from tensor2tensor.utils import metrics



For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
If you depend on functionality not listed there, please file an issue.

INFO:tensorflow:Entry Point [tensor2tensor.envs.tic_tac_toe_env:TicTacToeEnv] registered with id [T2TEnv-TicTacToeEnv-v0]


In [0]:
tfe = tf.contrib.eager
tfe.enable_eager_execution()

In [0]:
Modes = tf.estimator.ModeKeys

In [0]:
data_dir = os.path.expanduser("~/t2t/data")
tmp_dir = os.path.expanduser("~/t2t/tmp")
train_dir = os.path.expanduser("~/t2t/train")
checkpoint_dir = os.path.expanduser("~/t2t/checkpoints")
tf.gfile.MakeDirs(data_dir)
tf.gfile.MakeDirs(tmp_dir)
tf.gfile.MakeDirs(train_dir)
tf.gfile.MakeDirs(checkpoint_dir)
gs_data_dir = "gs://tensor2tensor-data"
gs_ckpt_dir = "gs://tensor2tensor-checkpoints/"

In [6]:
problems.available()

['algorithmic_addition_binary40',
 'algorithmic_addition_decimal40',
 'algorithmic_cipher_shift200',
 'algorithmic_cipher_shift5',
 'algorithmic_cipher_vigenere200',
 'algorithmic_cipher_vigenere5',
 'algorithmic_identity_binary40',
 'algorithmic_identity_decimal40',
 'algorithmic_math_deepmind_all',
 'algorithmic_math_two_variables',
 'algorithmic_multiplication_binary40',
 'algorithmic_multiplication_decimal40',
 'algorithmic_reverse_binary40',
 'algorithmic_reverse_binary40_test',
 'algorithmic_reverse_decimal40',
 'algorithmic_reverse_nlplike32k',
 'algorithmic_reverse_nlplike8k',
 'algorithmic_shift_decimal40',
 'algorithmic_sort_problem',
 'audio_timit_characters_tune',
 'audio_timit_tokens8k_test',
 'audio_timit_tokens8k_tune',
 'babi_qa_concat_all_tasks_10k',
 'babi_qa_concat_all_tasks_1k',
 'babi_qa_concat_task10_10k',
 'babi_qa_concat_task10_1k',
 'babi_qa_concat_task11_10k',
 'babi_qa_concat_task11_1k',
 'babi_qa_concat_task12_10k',
 'babi_qa_concat_task12_1k',
 'babi_qa_con

In [0]:
mnist_problem = problems.problem("image_mnist")

In [8]:
mnist_problem.generate_data(data_dir, tmp_dir)

INFO:tensorflow:Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to /root/t2t/tmp/train-images-idx3-ubyte.gz
100% completed
INFO:tensorflow:Successfully downloaded train-images-idx3-ubyte.gz, 9912422 bytes.
INFO:tensorflow:Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to /root/t2t/tmp/train-labels-idx1-ubyte.gz
113% completed
INFO:tensorflow:Successfully downloaded train-labels-idx1-ubyte.gz, 28881 bytes.
INFO:tensorflow:Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to /root/t2t/tmp/t10k-images-idx3-ubyte.gz
100% completed
INFO:tensorflow:Successfully downloaded t10k-images-idx3-ubyte.gz, 1648877 bytes.
INFO:tensorflow:Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to /root/t2t/tmp/t10k-labels-idx1-ubyte.gz
180% completed
INFO:tensorflow:Successfully downloaded t10k-labels-idx1-ubyte.gz, 4542 bytes.
INFO:tensorflow:Not downloading, file already found: /root/t2t/tmp/train-images-idx3-ubyte.

In [9]:
mnist_example = tfe.Iterator(mnist_problem.dataset(Modes.TRAIN, data_dir)).next()
image = mnist_example["inputs"]
label = mnist_example["targets"]

plt.imshow(image.numpy()[:, :, 0].astype(np.float32), cmap=plt.get_cmap('gray'))
print("Label: %d" % label.numpy())

INFO:tensorflow:Reading data files from /root/t2t/data/image_mnist-train*
INFO:tensorflow:partition: 0 num_data_files: 10
Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Deprecated in favor of operator or tf.math.divide.
Label: 4


In [0]:
ende_problem = problems.problem("translate_ende_wmt32k")

In [11]:
vocab_name = "vocab.translate_ende_wmt32k.32768.subwords"
vocab_file = os.path.join(gs_data_dir, vocab_name)
!gsutil cp {vocab_file} {data_dir}

Copying gs://tensor2tensor-data/vocab.translate_ende_wmt32k.32768.subwords...
/ [1 files][313.8 KiB/313.8 KiB]                                                
Operation completed over 1 objects/313.8 KiB.                                    


In [0]:
encoders = ende_problem.feature_encoders(data_dir)

In [0]:
def encode(input_str, output_str=None):
  """Input str to features dict, ready for inference"""
  inputs = encoders["inputs"].encode(input_str) + [1]  # add EOS id
  batch_inputs = tf.reshape(inputs, [1, -1, 1])  # Make it 3D.
  return {"inputs": batch_inputs}

def decode(integers):
  """List of ints to str"""
  integers = list(np.squeeze(integers))
  if 1 in integers:
    integers = integers[:integers.index(1)]
  return encoders["inputs"].decode(np.squeeze(integers))

In [0]:
# Generate and view the data
# This cell is commented out because WMT data generation can take hours

ende_problem.generate_data(data_dir, tmp_dir)
example = tfe.Iterator(ende_problem.dataset(Modes.TRAIN, data_dir)).next()
inputs = [int(x) for x in example["inputs"].numpy()] # Cast to ints.
targets = [int(x) for x in example["targets"].numpy()] # Cast to ints.



# Example inputs as int-tensor.
print("Inputs, encoded:")
print(inputs)
print("Inputs, decoded:")
# Example inputs as a sentence.
print(decode(inputs))
# Example targets as int-tensor.
print("Targets, encoded:")
print(targets)
# Example targets as a sentence.
print("Targets, decoded:")
print(decode(targets))

In [14]:
model_name = "transformer"
hparams_set = "transformer_base"

hparams = trainer_lib.create_hparams(hparams_set, data_dir=data_dir, problem_name="translate_ende_wmt32k")

# NOTE: Only create the model once when restoring from a checkpoint; it's a
# Layer and so subsequent instantiations will have different variable scopes
# that will not match the checkpoint.
translate_model = registry.model(model_name)(hparams, Modes.EVAL)

INFO:tensorflow:Setting T2TModel mode to 'eval'
INFO:tensorflow:Setting hparams.dropout to 0.0
INFO:tensorflow:Setting hparams.label_smoothing to 0.0
INFO:tensorflow:Setting hparams.layer_prepostprocess_dropout to 0.0
INFO:tensorflow:Setting hparams.symbol_dropout to 0.0
INFO:tensorflow:Setting hparams.attention_dropout to 0.0
INFO:tensorflow:Setting hparams.relu_dropout to 0.0


In [15]:
ckpt_name = "transformer_ende_test"
gs_ckpt = os.path.join(gs_ckpt_dir, ckpt_name)
!gsutil -q cp -R {gs_ckpt} {checkpoint_dir}
ckpt_path = tf.train.latest_checkpoint(os.path.join(checkpoint_dir, ckpt_name))
ckpt_path

'/root/t2t/checkpoints/transformer_ende_test/model.ckpt-1421000'

In [16]:
# Restore and translate!
def translate(inputs):
  encoded_inputs = encode(inputs)
  with tfe.restore_variables_on_create(ckpt_path):
    model_output = translate_model.infer(encoded_inputs)["outputs"]
  return decode(model_output)

inputs = "This is my project.I will write a conference paper about it and i will publish it on the BigDL-conference.This project can take me to Germany"
outputs = translate(inputs)

print("Inputs: %s" % inputs)
print("Outputs: %s" % outputs)

INFO:tensorflow:Greedy Decoding
Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Use tf.cast instead.
Inputs: This is my project.I will write a conference paper about it and i will publish it on the BigDL-conference.This project can take me to Germany
Outputs: Das ist mein Projekt.Ich werde ein Konferenzpapier darüber schreiben und es auf der BigDL-Konferenz veröffentlichen.Dieses Projekt kann mich nach Deutschland führen.
