# Seq2Seq NMT
The following code will translate English-Vietnamese using NMT. 
I have used code provided at https://github.com/tensorflow/nmt, proceed to the link for details on Seq2Seq architecture. 
In this code, i am using Luong-scaled attention mechanism (Luong et al., 2015).

In [1]:
import tensorflow as tf

## Loading Hyper-parameters 

In [2]:
hparams = tf.contrib.training.HParams()
data=''
with open ("params.json", "r") as jfile:
    data=eval(jfile.read())
for param,value in data.items():
    hparams.add_hparam(param,value)

In [3]:
hparams.add_hparam('num_residual_layers',0) # Not using it!

In [4]:
#from https://stackoverflow.com/a/38580201/6077501
from tensorflow.python.client import device_lib

def get_available_gpus():
    local_device_protos = device_lib.list_local_devices()
    return len([x.name for x in local_device_protos if x.device_type == 'GPU'])

In [5]:
hparams.add_hparam('num_gpus',get_available_gpus())

## Pre-processing 

In [6]:
import vocab_utils

In [7]:
src_vocab_size, _ = vocab_utils.check_vocab(hparams.src_vocab_file,hparams.out_dir)
tgt_vocab_size, _ = vocab_utils.check_vocab(hparams.tgt_vocab_file,hparams.out_dir)
hparams.add_hparam('src_vocab_size',src_vocab_size)
hparams.add_hparam('tgt_vocab_size',tgt_vocab_size)

b'# Vocab file /data/manni/mt_data/vocab.en exists'
b'# Vocab file /data/manni/mt_data/vocab.vi exists'


In [8]:
out_dir=hparams.out_dir
if not tf.gfile.Exists(out_dir): tf.gfile.MakeDirs(out_dir)

## Model Creator

In [9]:
import attention_model
import model
import model_helper
import iterator_utils
import collections
import misc_utils as utils

In [10]:
model_type=attention_model.AttentionModel

### Train Graph

In [11]:
class TrainGraph(collections.namedtuple("TrainGraph", ("graph", "model", "iterator","skip_count_placeholder"))):
    pass


def create_train_graph(scope=None):
    graph = tf.Graph()
    with graph.as_default():
        src_vocab_table, tgt_vocab_table = vocab_utils.create_vocab_tables(hparams.src_vocab_file, 
                                                                           hparams.tgt_vocab_file)
        
        src_dataset = tf.contrib.data.TextLineDataset(hparams.src_file)
        tgt_dataset = tf.contrib.data.TextLineDataset(hparams.tgt_file)
        skip_count_placeholder = tf.placeholder(shape=(), dtype=tf.int64)  # Needed?
        
        iterator = iterator_utils.get_iterator(src_dataset,tgt_dataset,src_vocab_table,tgt_vocab_table,
            batch_size=hparams.batch_size,
            sos=hparams.sos,
            eos=hparams.eos,
            source_reverse=hparams.source_reverse,
            random_seed=None,
            num_buckets=hparams.num_buckets,
            src_max_len=hparams.src_max_len,
            tgt_max_len=hparams.tgt_max_len,
            skip_count=skip_count_placeholder)
        
        model = model_type(hparams,
                iterator=iterator,
                mode=tf.contrib.learn.ModeKeys.TRAIN,
                source_vocab_table=src_vocab_table,
                target_vocab_table=tgt_vocab_table,
                scope=scope)
        
    return TrainGraph(graph=graph,model=model,iterator=iterator,skip_count_placeholder=skip_count_placeholder)

In [12]:
train_graph=create_train_graph()

b'# creating train graph ...'
b'  num_bi_layers = 1, num_bi_residual_layers=0'
b'  cell 0'b'  LSTM, forget_bias=1'b'  DropoutWrapper, dropout=0.2 'b'  DeviceWrapper, device=/gpu:0'b''
b'  cell 0'b'  LSTM, forget_bias=1'b'  DropoutWrapper, dropout=0.2 'b'  DeviceWrapper, device=/gpu:0'b''
b'  cell 0'b'  LSTM, forget_bias=1'b'  DropoutWrapper, dropout=0.2 'b'  DeviceWrapper, device=/gpu:0'b''
b'  cell 1'b'  LSTM, forget_bias=1'b'  DropoutWrapper, dropout=0.2 'b'  DeviceWrapper, device=/gpu:0'b''
  start_decay_step=8000, learning_rate=1, decay_steps 1000,decay_factor 0.5
b'# Trainable variables'
b'  embeddings/encoder/embedding_encoder:0, (17191, 512), '
b'  embeddings/decoder/embedding_decoder:0, (7709, 512), '
b'  dynamic_seq2seq/encoder/bidirectional_rnn/fw/basic_lstm_cell/kernel:0, (1024, 2048), /device:GPU:0'
b'  dynamic_seq2seq/encoder/bidirectional_rnn/fw/basic_lstm_cell/bias:0, (2048,), /device:GPU:0'
b'  dynamic_seq2seq/encoder/bidirectional_rnn/bw/basic_lstm_cell/kernel:0, (1024

### Eval Graph

In [13]:
class EvalGraph(collections.namedtuple("EvalGraph",("graph", "model", "src_file_placeholder",
                                                    "tgt_file_placeholder", 
                                                    "iterator"))):
    pass

def create_eval_graph(scope=None):
    graph = tf.Graph()

    with graph.as_default():
        src_vocab_table, tgt_vocab_table = vocab_utils.create_vocab_tables(
            hparams.src_vocab_file, hparams.tgt_vocab_file, hparams.share_vocab)
        src_file_placeholder = tf.placeholder(shape=(), dtype=tf.string)
        tgt_file_placeholder = tf.placeholder(shape=(), dtype=tf.string)
        src_dataset = tf.contrib.data.TextLineDataset(src_file_placeholder)
        tgt_dataset = tf.contrib.data.TextLineDataset(tgt_file_placeholder)
        iterator = iterator_utils.get_iterator(
            src_dataset,
            tgt_dataset,
            src_vocab_table,
            tgt_vocab_table,
            hparams.batch_size,
            sos=hparams.sos,
            eos=hparams.eos,
            source_reverse=hparams.source_reverse,
            random_seed=hparams.random_seed,
            num_buckets=hparams.num_buckets,
            src_max_len=hparams.src_max_len_infer,
            tgt_max_len=hparams.tgt_max_len_infer)
        model = model_type(
            hparams,
            iterator=iterator,
            mode=tf.contrib.learn.ModeKeys.EVAL,
            source_vocab_table=src_vocab_table,
            target_vocab_table=tgt_vocab_table,
            scope=scope)
    return EvalGraph(graph=graph,model=model,src_file_placeholder=src_file_placeholder,
                tgt_file_placeholder=tgt_file_placeholder,iterator=iterator)

In [14]:
# see the graph
eval_graph=create_eval_graph()

b'# creating eval graph ...'
b'  num_bi_layers = 1, num_bi_residual_layers=0'
b'  cell 0'b'  LSTM, forget_bias=1'b'  DeviceWrapper, device=/gpu:0'b''
b'  cell 0'b'  LSTM, forget_bias=1'b'  DeviceWrapper, device=/gpu:0'b''
b'  cell 0'b'  LSTM, forget_bias=1'b'  DeviceWrapper, device=/gpu:0'b''
b'  cell 1'b'  LSTM, forget_bias=1'b'  DeviceWrapper, device=/gpu:0'b''
  start_decay_step=8000, learning_rate=1, decay_steps 1000,decay_factor 0.5
b'# Trainable variables'
b'  embeddings/encoder/embedding_encoder:0, (17191, 512), '
b'  embeddings/decoder/embedding_decoder:0, (7709, 512), '
b'  dynamic_seq2seq/encoder/bidirectional_rnn/fw/basic_lstm_cell/kernel:0, (1024, 2048), /device:GPU:0'
b'  dynamic_seq2seq/encoder/bidirectional_rnn/fw/basic_lstm_cell/bias:0, (2048,), /device:GPU:0'
b'  dynamic_seq2seq/encoder/bidirectional_rnn/bw/basic_lstm_cell/kernel:0, (1024, 2048), /device:GPU:0'
b'  dynamic_seq2seq/encoder/bidirectional_rnn/bw/basic_lstm_cell/bias:0, (2048,), /device:GPU:0'
b'  dynamic_

### Inference Graph

In [15]:
from tensorflow.python.ops import lookup_ops

class InferGraph(
    collections.namedtuple("InferGraph",("graph", "model", "src_placeholder","batch_size_placeholder", 
                                         "iterator"))):
    pass

def create_infer_graph(scope=None):
    graph = tf.Graph()
    with graph.as_default():
        src_vocab_table, tgt_vocab_table = vocab_utils.create_vocab_tables(hparams.src_vocab_file, 
                                                                           hparams.tgt_vocab_file, 
                                                                           hparams.share_vocab)
        reverse_tgt_vocab_table = lookup_ops.index_to_string_table_from_file(hparams.tgt_vocab_file, 
                                                                             default_value=vocab_utils.UNK)

        src_placeholder = tf.placeholder(shape=[None], dtype=tf.string)
        batch_size_placeholder = tf.placeholder(shape=[], dtype=tf.int64)
        src_dataset = tf.contrib.data.Dataset.from_tensor_slices(src_placeholder)
        iterator = iterator_utils.get_infer_iterator(src_dataset,
                                                     src_vocab_table,
                                                     batch_size=batch_size_placeholder,
                                                     eos=hparams.eos,
                                                     source_reverse=hparams.source_reverse,
                                                     src_max_len=hparams.src_max_len_infer)
        model = model_type(hparams,
                           iterator=iterator,
                           mode=tf.contrib.learn.ModeKeys.INFER,
                           source_vocab_table=src_vocab_table,
                           target_vocab_table=tgt_vocab_table,
                           reverse_target_vocab_table=reverse_tgt_vocab_table,
                           scope=scope)
    return InferGraph(graph=graph,model=model,src_placeholder=src_placeholder,
                      batch_size_placeholder=batch_size_placeholder,iterator=iterator)

In [16]:
# see the graph
infer_graph=create_infer_graph()

b'# creating infer graph ...'
b'  num_bi_layers = 1, num_bi_residual_layers=0'
b'  cell 0'b'  LSTM, forget_bias=1'b'  DeviceWrapper, device=/gpu:0'b''
b'  cell 0'b'  LSTM, forget_bias=1'b'  DeviceWrapper, device=/gpu:0'b''
b'  cell 0'b'  LSTM, forget_bias=1'b'  DeviceWrapper, device=/gpu:0'b''
b'  cell 1'b'  LSTM, forget_bias=1'b'  DeviceWrapper, device=/gpu:0'b''
  start_decay_step=8000, learning_rate=1, decay_steps 1000,decay_factor 0.5
b'# Trainable variables'
b'  embeddings/encoder/embedding_encoder:0, (17191, 512), '
b'  embeddings/decoder/embedding_decoder:0, (7709, 512), '
b'  dynamic_seq2seq/encoder/bidirectional_rnn/fw/basic_lstm_cell/kernel:0, (1024, 2048), /device:GPU:0'
b'  dynamic_seq2seq/encoder/bidirectional_rnn/fw/basic_lstm_cell/bias:0, (2048,), /device:GPU:0'
b'  dynamic_seq2seq/encoder/bidirectional_rnn/bw/basic_lstm_cell/kernel:0, (1024, 2048), /device:GPU:0'
b'  dynamic_seq2seq/encoder/bidirectional_rnn/bw/basic_lstm_cell/bias:0, (2048,), /device:GPU:0'
b'  dynamic

## Data loader

In [17]:
import codecs

def load_data(inference_input_file):
    with codecs.getreader("utf-8")(tf.gfile.GFile(inference_input_file, mode="rb")) as f:
        inference_data = f.read().splitlines()
    if hparams and hparams.inference_indices:
        inference_data = [inference_data[i] for i in hparams.inference_indices]
    return inference_data

### Loading a sample from Eval set

In [18]:
sample_src_data = load_data(hparams.dev_src_file)
sample_tgt_data = load_data(hparams.dev_tgt_file)

### Assign sessions to graphs

In [19]:
config_proto = utils.get_config_proto(log_device_placement=True)

train_sess = tf.Session(config=config_proto, graph=train_graph.graph)
eval_sess = tf.Session(config=config_proto, graph=eval_graph.graph)
infer_sess = tf.Session(config=config_proto, graph=infer_graph.graph)

### Load Train Graph to Process

In [20]:
with train_graph.graph.as_default():
    loaded_train_model, global_step = model_helper.create_or_load_model(train_graph.model, 
                                                                        hparams.out_dir, 
                                                                        train_sess, 
                                                                        "train")

b'  created train model with fresh parameters, time 2.06s'


### Write Graph to TensorBoard

In [21]:
import os
summary_writer = tf.summary.FileWriter(os.path.join(hparams.out_dir, 'Training'), train_graph.graph)

# Train

In [22]:
for metric in hparams.metrics:
    hparams.add_hparam("best_" + metric, 0)  
    best_metric_dir = os.path.join(hparams.out_dir, "best_" + metric)
    hparams.add_hparam("best_" + metric + "_dir", best_metric_dir)
    tf.gfile.MakeDirs(best_metric_dir)

In [23]:
import Train

#Initial evaluation
eval_results, _ = Train.run_full_eval(hparams.out_dir, infer_graph, infer_sess, eval_graph, eval_sess, 
                                      hparams, summary_writer, sample_src_data, sample_tgt_data)


b'  created infer model with fresh parameters, time 1.20s'
b'  # 1501'
    src: This is Bjorn Sundin .
    ref: Bjorn Sundin .
    nmt: 100 đúc đúc đúc đúc Hirshhorn Hirshhorn Lai Lai Lai
b'  created eval model with fresh parameters, time 0.68s'
  eval dev: perplexity 8363.03, time 3s, Thu Jul 27 19:31:46 2017.
b'  created infer model with fresh parameters, time 0.30s'


## Intialization

In [24]:
import time

In [25]:
last_stats_step = global_step
last_eval_step = global_step
last_external_eval_step = global_step

steps_per_eval = 10 * hparams.steps_per_stats
steps_per_external_eval = 5 * steps_per_eval

avg_step_time = 0.0
step_time, checkpoint_loss, checkpoint_predict_count = 0.0, 0.0, 0.0
checkpoint_total_count = 0.0
speed, train_ppl = 0.0, 0.0
start_train_time = time.time()

utils.print_out("# Start step %d, lr %g, %s" %
                (global_step, loaded_train_model.learning_rate.eval(session=train_sess),
                 time.ctime()))
skip_count = hparams.batch_size * hparams.epoch_step
utils.print_out("# Init train iterator, skipping %d elements" % skip_count)

train_sess.run(train_graph.iterator.initializer,
              feed_dict={train_graph.skip_count_placeholder: skip_count})

b'# Start step 0, lr 1, Thu Jul 27 19:31:56 2017'
b'# Init train iterator, skipping 0 elements'


## Training loop

In [26]:
import math

while global_step < hparams.num_train_steps:
    start_time = time.time()
    try:
        step_result = loaded_train_model.train(train_sess)
        (_, step_loss, step_predict_count, step_summary, global_step, step_word_count, 
        batch_size) = step_result
        hparams.epoch_step += 1
    except tf.errors.OutOfRangeError:
        #Next Epoch
        hparams.epoch_step = 0
        utils.print_out("# Finished an epoch, step %d. Perform external evaluation" %global_step)
        Train.run_sample_decode(infer_graph, 
                          infer_sess, 
                          hparams.out_dir, 
                          hparams, 
                          summary_writer, 
                          sample_src_data,
                          sample_tgt_data)
        dev_scores, test_scores, _ = Train.run_external_eval(infer_graph, 
                                                             infer_sess, 
                                                             hparams.out_dir,
                                                             hparams, 
                                                             summary_writer)
        train_sess.run(train_graph.iterator.initializer,feed_dict={train_graph.skip_count_placeholder: 0})
        continue

    summary_writer.add_summary(step_summary, global_step)
    
    #Statistics
    step_time += (time.time() - start_time)
    checkpoint_loss += (step_loss * batch_size)
    checkpoint_predict_count += step_predict_count
    checkpoint_total_count += float(step_word_count)

    #print statistics
    if global_step - last_stats_step >= hparams.steps_per_stats:
        last_stats_step = global_step
        avg_step_time = step_time / hparams.steps_per_stats
        train_ppl = utils.safe_exp(checkpoint_loss / checkpoint_predict_count)
        speed = checkpoint_total_count / (1000 * step_time)
        
        utils.print_out("  global step %d lr %g "
          "step-time %.2fs wps %.2fK ppl %.2f %s" %
          (global_step,
           loaded_train_model.learning_rate.eval(session=train_sess),
           avg_step_time, speed, train_ppl, Train._get_best_results(hparams)))
        
        if math.isnan(train_ppl):
            break

        # Reset timer and loss.
        step_time, checkpoint_loss, checkpoint_predict_count = 0.0, 0.0, 0.0
        checkpoint_total_count = 0.0
        

    if global_step - last_eval_step >= steps_per_eval:
        last_eval_step = global_step
        utils.print_out("# Save eval, global step %d" % global_step)
        utils.add_summary(summary_writer, global_step, "train_ppl", train_ppl)

        # Save checkpoint
        loaded_train_model.saver.save(train_sess,os.path.join(hparams.out_dir, "translate.ckpt"),
                                      global_step=global_step)

        # Evaluate on dev/test
        Train.run_sample_decode(infer_graph, 
                                infer_sess, 
                                out_dir, 
                                hparams, 
                                summary_writer, 
                                sample_src_data,
                                sample_tgt_data)
        dev_ppl, test_ppl = Train.run_internal_eval(eval_graph, 
                                                    eval_sess, 
                                                    out_dir, 
                                                    hparams, 
                                                    summary_writer)

    if global_step - last_external_eval_step >= steps_per_external_eval:
        last_external_eval_step = global_step
        
        # Save checkpoint
        loaded_train_model.saver.save(train_sess,os.path.join(hparams.out_dir, "translate.ckpt"),
                                      global_step=global_step)
        
        Train.run_sample_decode(infer_graph, 
                                infer_sess,
                                out_dir, 
                                hparams, 
                                summary_writer, 
                                sample_src_data,
                                sample_tgt_data)
        dev_scores, test_scores, _ = Train.run_external_eval(infer_graph, 
                                                             infer_sess, 
                                                             out_dir,
                                                             hparams, 
                                                             summary_writer)

b'  global step 100 lr 1 step-time 0.56s wps 9.97K ppl 598274.04 bleu 0.00'
b'  global step 200 lr 1 step-time 0.51s wps 10.93K ppl 63224.74 bleu 0.00'
b'  global step 300 lr 1 step-time 0.52s wps 10.88K ppl 5383.52 bleu 0.00'
b'  global step 400 lr 1 step-time 0.51s wps 10.95K ppl 976.09 bleu 0.00'
b'  global step 500 lr 1 step-time 0.52s wps 10.92K ppl 569.79 bleu 0.00'
b'  global step 600 lr 1 step-time 0.52s wps 10.96K ppl 433.69 bleu 0.00'
b'  global step 700 lr 1 step-time 0.51s wps 11.00K ppl 335.05 bleu 0.00'
b'  global step 800 lr 1 step-time 0.51s wps 11.04K ppl 252.46 bleu 0.00'
b'  global step 900 lr 1 step-time 0.51s wps 11.04K ppl 201.56 bleu 0.00'
b'  global step 1000 lr 1 step-time 0.51s wps 11.00K ppl 156.57 bleu 0.00'
b'# Save eval, global step 1000'
INFO:tensorflow:Restoring parameters from /data/manni/mt_model/translate.ckpt-1000
b'  loaded infer model parameters from /data/manni/mt_model/translate.ckpt-1000, time 0.12s'
b'  # 95'
    src: &quot; It can &apos;t be d

b'  loaded infer model parameters from /data/manni/mt_model/translate.ckpt-4000, time 0.23s'
b'  # 1358'
    src: The other thing we would like to ask is of companies also all over the world that will be able to help us validate these AEDs .
    ref: Một điều khác nữa chúng tôi xin rất mong được giúp đỡ là các công ty trên khắp thế giới có thể giúp chúng tôi duy trì các thiết bị AED này .
    nmt: Một điều khác mà chúng tôi muốn đặt ra là các công ty cũng có thể giúp chúng ta có thể giúp chúng ta vượt qua những điều đó . </s> <unk> <unk> <unk> <unk> <unk> <unk> <unk>
INFO:tensorflow:Restoring parameters from /data/manni/mt_model/translate.ckpt-4000
b'  loaded eval model parameters from /data/manni/mt_model/translate.ckpt-4000, time 0.20s'
  eval dev: perplexity 14.41, time 3s, Thu Jul 27 20:10:50 2017.
b'  global step 4100 lr 1 step-time 0.50s wps 11.09K ppl 11.58 bleu 0.00'
b'# Finished an epoch, step 4172. Perform external evaluation'
INFO:tensorflow:Restoring parameters from /data/m

b'# External evaluation, global step 6000'
b'  decoding to output /data/manni/mt_model/output_dev.'
  done, num sentences 1553, time 82s, Thu Jul 27 20:36:10 2017.
b'  bleu dev: 0.1'
b'  saving hparams to /data/manni/mt_model/hparams'
b'  global step 6300 lr 1 step-time 0.55s wps 10.04K ppl 7.84 bleu 0.15'
b'  global step 6400 lr 1 step-time 0.54s wps 10.57K ppl 7.23 bleu 0.15'
b'  global step 6500 lr 1 step-time 0.50s wps 11.07K ppl 7.30 bleu 0.15'
b'  global step 6600 lr 1 step-time 0.50s wps 11.18K ppl 7.49 bleu 0.15'
b'  global step 6700 lr 1 step-time 0.51s wps 11.19K ppl 7.50 bleu 0.15'
b'  global step 6800 lr 1 step-time 0.50s wps 11.13K ppl 7.48 bleu 0.15'
b'  global step 6900 lr 1 step-time 0.51s wps 10.98K ppl 7.62 bleu 0.15'
b'  global step 7000 lr 1 step-time 0.51s wps 11.08K ppl 7.74 bleu 0.15'
b'# Save eval, global step 7000'
INFO:tensorflow:Restoring parameters from /data/manni/mt_model/translate.ckpt-7000
b'  loaded infer model parameters from /data/manni/mt_model/trans

b'  global step 9900 lr 0.5 step-time 0.51s wps 11.10K ppl 5.16 bleu 0.16'
b'  global step 10000 lr 0.25 step-time 0.51s wps 11.18K ppl 5.26 bleu 0.16'
b'# Save eval, global step 10000'
INFO:tensorflow:Restoring parameters from /data/manni/mt_model/translate.ckpt-10000
b'  loaded infer model parameters from /data/manni/mt_model/translate.ckpt-10000, time 0.12s'
b'  # 1268'
    src: And it &apos;s also a great example of government getting in on the crowd-sourcing game .
    ref: Đây cũng là một ví dụ điển hình khác về việc chính phủ triển khai trò chơi cộng đồng .
    nmt: Và nó cũng là một ví dụ tuyệt vời về chính phủ đang tham gia vào trò chơi <unk> . </s>
INFO:tensorflow:Restoring parameters from /data/manni/mt_model/translate.ckpt-10000
b'  loaded eval model parameters from /data/manni/mt_model/translate.ckpt-10000, time 0.32s'
  eval dev: perplexity 11.80, time 3s, Thu Jul 27 21:13:14 2017.
INFO:tensorflow:Restoring parameters from /data/manni/mt_model/translate.ckpt-10000
b'  loa

In [27]:
#Save training
loaded_train_model.saver.save(train_sess,os.path.join(out_dir, "translate.ckpt"),global_step=global_step)

'/data/manni/mt_model/translate.ckpt-12000'

### Evaluate

In [28]:
import evaluation_utils

eval_results, _ = Train.run_full_eval(out_dir, 
                                infer_graph, 
                                infer_sess,
                                eval_graph, 
                                eval_sess, 
                                hparams,
                                summary_writer, 
                                sample_src_data,
                                sample_tgt_data)

##Printing stats
utils.print_out("# Final, step %d lr %g "
                "step-time %.2f wps %.2fK ppl %.2f, %s, %s" %
                (global_step, loaded_train_model.learning_rate.eval(session=train_sess),
                avg_step_time, speed, train_ppl, eval_results, time.ctime()))
utils.print_time("# Done training!", start_train_time)

utils.print_out("# Start evaluating saved best models.")

for metric in hparams.metrics:
    best_model_dir = getattr(hparams, "best_" + metric + "_dir")
    eval_results, best_global_step = Train.run_full_eval(best_model_dir, 
                                                         infer_graph, 
                                                         infer_sess, 
                                                         eval_graph, 
                                                         eval_sess, 
                                                         hparams,
                                                         summary_writer, 
                                                         sample_src_data, 
                                                         sample_tgt_data)
    utils.print_out("# Best %s, step %d "
                    "step-time %.2f wps %.2fK, %s, %s" %
                    (metric, best_global_step, avg_step_time, speed,eval_results, time.ctime()))

INFO:tensorflow:Restoring parameters from /data/manni/mt_model/translate.ckpt-12000
b'  loaded infer model parameters from /data/manni/mt_model/translate.ckpt-12000, time 0.20s'
b'  # 94'
    src: With no support from her husband , she caused a sensation by taking him to court and prosecuting her own case , and a far greater sensation when she won .
    ref: Và dầu vấp phải sự không ủng hộ của chồng , bà gây chấn động khi kiện ông ta ra toà và tự khởi tố , và còn gây chấn động mạnh hơn khi bà thắng kiện .
    nmt: Không có sự ủng hộ từ chồng cô ấy , cô đã tạo ra một cảm giác bằng cách đưa ông đến phiên toà và làm cho chính trường hợp của cô ấy một cảm giác lớn hơn khi cô ấy thắng . </s> </s> <unk>
INFO:tensorflow:Restoring parameters from /data/manni/mt_model/translate.ckpt-12000
b'  loaded eval model parameters from /data/manni/mt_model/translate.ckpt-12000, time 0.20s'
  eval dev: perplexity 11.97, time 3s, Thu Jul 27 21:35:11 2017.
INFO:tensorflow:Restoring parameters from /data/man

# Inference / Test

In [None]:
hparams.add_hparam("inference_input_file","/data/manni/mt_data/tst2013.en")
hparams.add_hparam("inference_output_file","/data/manni/mt_data/tst2013_pred.vi")
hparams.add_hparam("inference_ref_file","/data/manni/mt_data/tst2013.vi")
hparams.add_hparam("jobid",0)
hparams.add_hparam("num_workers",1)

In [36]:
import inference

trans_file = hparams.inference_output_file
ckpt = hparams.ckpt
if not ckpt:
    ckpt = tf.train.latest_checkpoint(out_dir)
    inference.inference(ckpt, hparams.inference_input_file, trans_file, hparams, hparams.num_workers, hparams.jobid)
    ref_file = hparams.inference_ref_file
    if ref_file and tf.gfile.Exists(trans_file):
        for metric in hparams.metrics:
            score = evaluation_utils.evaluate(ref_file,
                                              trans_file,
                                              metric,
                                              hparams.bpe_delimiter)
            utils.print_out("  %s: %.1f" % (metric, score))

b'# creating infer graph ...'
b'  num_bi_layers = 1, num_bi_residual_layers=0'
b'  cell 0'b'  LSTM, forget_bias=1'b'  DeviceWrapper, device=/gpu:0'b''
b'  cell 0'b'  LSTM, forget_bias=1'b'  DeviceWrapper, device=/gpu:0'b''
b'  cell 0'b'  LSTM, forget_bias=1'b'  DeviceWrapper, device=/gpu:0'b''
b'  cell 1'b'  LSTM, forget_bias=1'b'  DeviceWrapper, device=/gpu:0'b''
  start_decay_step=8000, learning_rate=1, decay_steps 1000,decay_factor 0.5
b'# Trainable variables'
b'  embeddings/encoder/embedding_encoder:0, (17191, 512), '
b'  embeddings/decoder/embedding_decoder:0, (7709, 512), '
b'  dynamic_seq2seq/encoder/bidirectional_rnn/fw/basic_lstm_cell/kernel:0, (1024, 2048), /device:GPU:0'
b'  dynamic_seq2seq/encoder/bidirectional_rnn/fw/basic_lstm_cell/bias:0, (2048,), /device:GPU:0'
b'  dynamic_seq2seq/encoder/bidirectional_rnn/bw/basic_lstm_cell/kernel:0, (1024, 2048), /device:GPU:0'
b'  dynamic_seq2seq/encoder/bidirectional_rnn/bw/basic_lstm_cell/bias:0, (2048,), /device:GPU:0'
b'  dynamic