In [1]:
import sys; sys.path.append('..')

import os
import json
import argparse
from time import time
from datetime import timedelta

import numpy as np
import tensorflow as tf
from tqdm import tqdm
import matplotlib.pyplot as plt
from pandas import ewma
from IPython.display import clear_output

from vocab import Vocab
from src.training_utils import *
from lib.tensor_utils import infer_mask, initialize_uninitialized_variables, all_shapes_equal

from models.transformer_fused import Model
from models.transformer_lm import TransformerLM

  return f(*args, **kwds)
Using TensorFlow backend.


In [2]:
from os import path

model_name = 'transformer'

config = {
    'data_path': '../data_small',
    'src_lm_path': '../trained_models/lm1/model.npz',
    'target_lm_path': '../trained_models/lm2/model.npz',
    'hp_file_path': '../hp_files/trans_default.json',
    'use_early_stopping': True,
    'early_stopping_last_n': 10,
    'max_epochs': 1000,
    'max_time_seconds': 1200,
    'batch_size_for_inference': 16,
    'max_len': 200,
    'validate_every_num_epochs': 5,
    #'warm_up_num_epochs': 10,
    'gpu_memory_fraction': 0.5,
    'min_interval_between_saves': 3,
    'max_num_models': 4
}

In [3]:
model_path = '../trained_models/{}'.format(model_name)
if not os.path.isdir('trained_models'): os.mkdir('trained_models')
if not os.path.isdir(model_path): os.mkdir(model_path)

src_train_path = '{}/bpe_parallel_train1.txt'.format(config.get('data_path'))
dst_train_path = '{}/bpe_parallel_train2.txt'.format(config.get('data_path'))
src_val_path = '{}/bpe_parallel_val1.txt'.format(config.get('data_path'))
dst_val_path = '{}/bpe_parallel_val2.txt'.format(config.get('data_path'))

src_train = open(src_train_path, 'r', encoding='utf-8').read().splitlines()
dst_train = open(dst_train_path, 'r', encoding='utf-8').read().splitlines()
src_val = open(src_val_path, 'r', encoding='utf-8').read().splitlines()
dst_val = open(dst_val_path, 'r', encoding='utf-8').read().splitlines()

inp_voc = Vocab.from_file('{}/1.voc'.format(config.get('data_path')))
out_voc = Vocab.from_file('{}/2.voc'.format(config.get('data_path')))
max_len = config.get('max_len', 200)

max_num_models = config.get('max_num_models', 4)

hp = json.load(open(config.get('hp_file_path'), 'r', encoding='utf-8')) if config.get('hp_file_path') else {}

In [4]:
gpu_options = create_gpu_options(config)

sess = tf.InteractiveSession(config=tf.ConfigProto(gpu_options=gpu_options))
lm = TransformerLM('lm2', out_voc, **hp)
if config.get('target_lm_path'):
    lm_weights = np.load(config.get('target_lm_path'))
    ops = []
    for w in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, lm.name):
        if w.name in lm_weights:
            ops.append(tf.assign(w, lm_weights[w.name]))
        else:
            print(w.name, 'not initialized')

    sess.run(ops)
else:
    raise ValueError("Must specify LM path!")
model = Model(model_name, inp_voc, out_voc, lm, **hp)

inp = tf.placeholder(tf.int32, [None, None])
out = tf.placeholder(tf.int32, [None, None])
logprobs = model.symbolic_score(inp, out, is_train=True)[:,:tf.shape(out)[1]]

nll = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logprobs, labels=out)
loss = nll * infer_mask(out, out_voc.eos, dtype=tf.float32)
loss = tf.reduce_sum(loss, axis=1)
loss = tf.reduce_mean(loss)

weights = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, model_name)

all_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
non_trainable_vars = list(set(all_vars).difference(set(weights)))

grads = tf.gradients(loss, weights)
grads = tf.clip_by_global_norm(grads, 100)[0]
optimizer = create_optimizer(hp)
train_step = optimizer.apply_gradients(zip(grads, weights))

initialize_uninitialized_variables(sess)

assigns = []
weights_by_common_name = {w.name[len(model_name)+1:]: w for w in weights}

with np.load(config.get('target_lm_path')) as dic:
    for key in dic: # decoder_init
        print(key)
        w_lm = dic[key]
        weights_key = '/'.join(key.split('/')[1:]).replace('main/','').replace("enc",'dec').replace("inp","out")
        if "emb_out_bias" in weights_key: # no such thing
            continue

        w_var = weights_by_common_name[weights_key]

        all_shapes_equal(w_lm, w_var, session=sess, mode='assert')

        assigns.append(tf.assign(w_var,w_lm))


with np.load(config.get("src_lm_path")) as dic:
    for key in dic: # encoder_init
        w_lm = dic[key]
        print(key)
        weights_key = '/'.join(key.split('/')[1:]).replace('main/','')
        if "logits" in weights_key: # encoder has no 'logits' layer for the logits to be initialised
            continue
        w_var = weights_by_common_name[weights_key]

        all_shapes_equal(w_lm, w_var, session=sess, mode='assert')
        assigns.append(tf.assign(w_var,w_lm))

sess.run(assigns)

batch_size = hp.get('batch_size', 32)
epoch = 0
training_start_time = time()
loss_history = []
val_scores = []

num_iters_done = 0
should_start_next_epoch = True # We need this var to break outer loop

lm2/emb_inp_bias:0
lm2/main/emb_inp/mat:0
lm2/main/enc_attn-0/mem_conv/W:0
lm2/main/enc_attn-0/mem_conv/b:0
lm2/main/enc_attn-0/out_conv/W:0
lm2/main/enc_attn-0/out_conv/b:0
lm2/main/enc_attn-0/layer_norm/scale:0
lm2/main/enc_attn-0/layer_norm/bias:0
lm2/main/enc_attn-1/mem_conv/W:0
lm2/main/enc_attn-1/mem_conv/b:0
lm2/main/enc_attn-1/out_conv/W:0
lm2/main/enc_attn-1/out_conv/b:0
lm2/main/enc_attn-1/layer_norm/scale:0
lm2/main/enc_attn-1/layer_norm/bias:0
lm2/main/enc_attn-2/mem_conv/W:0
lm2/main/enc_attn-2/mem_conv/b:0
lm2/main/enc_attn-2/out_conv/W:0
lm2/main/enc_attn-2/out_conv/b:0
lm2/main/enc_attn-2/layer_norm/scale:0
lm2/main/enc_attn-2/layer_norm/bias:0
lm2/main/enc_attn-3/mem_conv/W:0
lm2/main/enc_attn-3/mem_conv/b:0
lm2/main/enc_attn-3/out_conv/W:0
lm2/main/enc_attn-3/out_conv/b:0
lm2/main/enc_attn-3/layer_norm/scale:0
lm2/main/enc_attn-3/layer_norm/bias:0
lm2/main/enc_ffn-0/conv1/W:0
lm2/main/enc_ffn-0/conv1/b:0
lm2/main/enc_ffn-0/conv2/W:0
lm2/main/enc_ffn-0/conv2/b:0
lm2/ma

In [5]:
global num_model_saves_counter; num_model_saves_counter = 0
global last_save_epoch; last_save_epoch = -config.get('min_interval_between_saves')

def save_model():
    global num_model_saves_counter, last_save_epoch

    if epoch - config.get('min_interval_between_saves') < last_save_epoch:
        print('Do not save the model, because not enough epochs passed')
        return

    save_path = '{}/model_{}.npz'.format(model_path, num_model_saves_counter % max_num_models)
    print('Saving the model into %s' %save_path)

    w_values = sess.run(weights)
    weights_dict = {w.name: w_val for w, w_val in zip(weights, w_values)}
    np.savez(save_path, **weights_dict)

    num_model_saves_counter += 1
    last_save_epoch = epoch

def validate():
    """
    Returns should_continue flag, which tells us if we should continue or early stop
    """
    should_continue = True

    if config.get('warm_up_num_epochs') and config.get('warm_up_num_epochs') > epoch:
        print('Skipping validation, becaused is not warmed up yet')
        return should_continue
    else:
        print('Cool, I will validate, because warm_up_num_epochs is not set')

    print('Validating')
    val_score = compute_bleu_for_model(model, sess, inp_voc, out_voc, src_val, dst_val,
                                        model_name, config, max_len=max_len)
    val_scores.append(val_score)
    print('Validation BLEU: {:0.3f}'.format(val_score))

    # Save model if this is our best model
    if np.argmax(val_scores) == len(val_scores)-1:
        print('Saving model because it has the highest validation BLEU.')
        save_model()
    else:
        print('I will not save the model because of its low val_score')

    if config.get('use_early_stopping') and should_stop_early(val_scores, config.get('early_stopping_last_n')):
        print('Model did not improve for last %s steps. Early stopping.' % config.get('early_stopping_last_n'))
        should_continue = False
    else:
        print('Cool, we will not stop early')

    return should_continue

In [6]:
while should_start_next_epoch:
    batches = batch_generator_over_dataset(src_train, dst_train, batch_size, batches_per_epoch=None)
    with tqdm(batches) as t:
        for batch_src, batch_dst in t:
            # Note: we don't use voc.tokenize_many(batch, max_len=max_len)
            # cuz it forces batch length to be that long and we often get away with much less
            batch_src_ix = inp_voc.tokenize_many(batch_src)[:, :max_len]
            batch_dst_ix = out_voc.tokenize_many(batch_dst)[:, :max_len]

            feed_dict = {inp: batch_src_ix, out: batch_dst_ix}

            loss_t = sess.run([train_step, loss], feed_dict)[1]
            loss_history.append(np.mean(loss_t))

            loss_hist_val = ewma(np.array(loss_history[-50:]), span=50)[-1]
            
            if num_iters_done % 10 == 0:
                clear_output(True)
                plt.figure(figsize=[15,8])
                plt.title('Batch loss')
                plt.plot(loss_history)
                plt.plot(ewma(np.array(loss_history),span=50))
                plt.grid()
                plt.show()

            num_iters_done += 1

            if config.get('max_time_seconds'):
                seconds_elapsed = time()-training_start_time

                if seconds_elapsed > config.get('max_time_seconds'):
                    print('Maximum allowed training time reached. Training took %s. Stopping.' % seconds_elapsed)
                    should_start_next_epoch = False
                    break

        if epoch % config.get('validate_every_num_epochs') == 0 and should_start_next_epoch:
            should_start_next_epoch = validate()

        if config.get('max_epochs') and config.get('max_epochs') == epoch:
            print('Maximum amount of epochs reached. Stopping.')
            should_start_next_epoch = False
            break

        epoch +=1

0it [00:00, ?it/s]


ResourceExhaustedError: OOM when allocating tensor with shape[16,91,512]
	 [[Node: transformer_dec/dec_ffn-0/conv2/add = Add[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:GPU:0"](transformer_dec/dec_ffn-0/conv2/Reshape_1, transformer/dec_ffn-0/conv2/b/read)]]
	 [[Node: Mean/_2403 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_16917_Mean", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]

Caused by op 'transformer_dec/dec_ffn-0/conv2/add', defined at:
  File "/home/ubuntu/anaconda3/lib/python3.6/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/home/ubuntu/anaconda3/lib/python3.6/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/home/ubuntu/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/home/ubuntu/anaconda3/lib/python3.6/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/home/ubuntu/anaconda3/lib/python3.6/site-packages/ipykernel/kernelapp.py", line 477, in start
    ioloop.IOLoop.instance().start()
  File "/home/ubuntu/anaconda3/lib/python3.6/site-packages/zmq/eventloop/ioloop.py", line 177, in start
    super(ZMQIOLoop, self).start()
  File "/home/ubuntu/anaconda3/lib/python3.6/site-packages/tornado/ioloop.py", line 888, in start
    handler_func(fd_obj, events)
  File "/home/ubuntu/anaconda3/lib/python3.6/site-packages/tornado/stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/ubuntu/anaconda3/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 440, in _handle_events
    self._handle_recv()
  File "/home/ubuntu/anaconda3/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 472, in _handle_recv
    self._run_callback(callback, msg)
  File "/home/ubuntu/anaconda3/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 414, in _run_callback
    callback(*args, **kwargs)
  File "/home/ubuntu/anaconda3/lib/python3.6/site-packages/tornado/stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/ubuntu/anaconda3/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/home/ubuntu/anaconda3/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 235, in dispatch_shell
    handler(stream, idents, msg)
  File "/home/ubuntu/anaconda3/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "/home/ubuntu/anaconda3/lib/python3.6/site-packages/ipykernel/ipkernel.py", line 196, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/home/ubuntu/anaconda3/lib/python3.6/site-packages/ipykernel/zmqshell.py", line 533, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/home/ubuntu/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2698, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/home/ubuntu/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2802, in run_ast_nodes
    if self.run_code(code, result):
  File "/home/ubuntu/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2862, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-4-94ab2ce8263d>", line 21, in <module>
    logprobs = model.symbolic_score(inp, out, is_train=True)[:,:tf.shape(out)[1]]
  File "../models/transformer_fused.py", line 60, in symbolic_score
    rdo = self.transformer.decode(out, out_len, out_reverse, enc_out, enc_attn_mask, is_train)
  File "../models/transformer_other.py", line 412, in decode
    dec_inp = self.dec_ffn[layer](dec_inp)
  File "../lib/layers.py", line 368, in __call__
    out = self.wrapped_layer(out, *args, **kwargs)
  File "../models/transformer_other.py", line 45, in __call__
    outputs = self.second_conv(hidden)
  File "../lib/layers.py", line 46, in __call__
    out = self.activ(dot(inp, self.W) + self.b)
  File "/home/ubuntu/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/math_ops.py", line 894, in binary_op_wrapper
    return func(x, y, name=name)
  File "/home/ubuntu/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/gen_math_ops.py", line 183, in add
    "Add", x=x, y=y, name=name)
  File "/home/ubuntu/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "/home/ubuntu/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 2956, in create_op
    op_def=op_def)
  File "/home/ubuntu/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1470, in __init__
    self._traceback = self._graph._extract_stack()  # pylint: disable=protected-access

ResourceExhaustedError (see above for traceback): OOM when allocating tensor with shape[16,91,512]
	 [[Node: transformer_dec/dec_ffn-0/conv2/add = Add[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:GPU:0"](transformer_dec/dec_ffn-0/conv2/Reshape_1, transformer/dec_ffn-0/conv2/b/read)]]
	 [[Node: Mean/_2403 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_16917_Mean", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]


In [7]:
print('Validation scores:')
print(val_scores)

# Training is done!
# Let's check the val score of the model and if it's good — save it
print('Computing final validation score.')
val_score = compute_bleu_for_model(model, sess, inp_voc, out_voc, src_val, dst_val,
                                    model_name, config, max_len=max_len)
print('Final validation BLEU is: {:0.3f}'.format(val_score))

if len(val_scores) == 0 or val_score >= max(val_scores):
    save_model()

Validation scores:
[0.005051021781026407, 0.005051021781026407, 0.005051021781026407, 0.005051021781026407, 0.005051021781026407, 0.005051021781026407, 0.005051021781026407]
Computing final validation score.
Final validation BLEU is: 0.005
Saving the model into trained_models/transformer/model_1.npz


In [8]:
# Ok, we have trained the thing, let's run inference?
tf.reset_default_graph()

model_name = 'transformer'
data_path = '../en-fr-10'

config = {
    'data_path': data_path,
    'models_dir': '../trained_models/transformer',
    'input_path': os.path.join(data_path, 'bpe_input.txt'),
    'output_path': os.path.join(data_path, 'output.tok.txt'),
    'hp_file_path': '../hp_files/trans_default.json',
    'batch_size_for_inference': 16,
    'target_lm_path': '../trained_models/lm2/model.npz',
    'gpu_memory_fraction': 0.5,
}

In [None]:
input_path = config.get('input_path')
output_path = config.get('output_path')

src_data = open(input_path, 'r', encoding='utf-8').read().splitlines()

inp_voc = Vocab.from_file('{}/1.voc'.format(config.get('data_path')))
out_voc = Vocab.from_file('{}/2.voc'.format(config.get('data_path')))

# We get paths to trained models via this argument
# We do not save optimizer state, so we can read all files from dir
paths_to_models = ['{}/{}'.format(config.get('models_dir'), m) for m in os.listdir(config.get('models_dir'))]
print('Found models to ensemble:', paths_to_models)

hp = json.load(open(config.get('hp_file_path'), 'r', encoding='utf-8')) if config.get('hp_file_path') else {}
gpu_options = create_gpu_options(config)
max_len = config.get('max_input_len', 200)


sess = tf.InteractiveSession(config=tf.ConfigProto(gpu_options=gpu_options))

lm = TransformerLM('lm2', out_voc, **hp)
if config.get('target_lm_path'):
    lm_weights = np.load(config.get('target_lm_path'))
    ops = []

    for w in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, lm.name):
        if w.name in lm_weights:
            ops.append(tf.assign(w, lm_weights[w.name]))
        else:
            print(w.name, 'not initialized')

    sess.run(ops);
else:
    raise ValueError("Must specify LM path!")

models = []
assigns = []
print('Loading models')
for i, model_path in enumerate(paths_to_models):
    print('Loading model from', model_path)
    # Loading weights is not an easy task:
    # They were saved in transformer/ scope,
    # but now we should rename them into transformer_i/ to avoid collision
    curr_model_name = 'transformer_' + str(i)
    curr_model = Model(curr_model_name, inp_voc, out_voc, lm, **hp)
    models.append(curr_model)

    curr_weights = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, curr_model_name)

    # Loading current model state
    for key, value in np.load(model_path).items():
        desired_key = curr_model_name + '/' + '/'.join(key.split('/')[1:]).replace('transformer/', '')
        print('Renaming {} => {}'.format(key, desired_key))

        # weight_var = tf.get_variable(desired_key)
        weight_var = None
        for v in curr_weights:
            if v.name == desired_key:
                weight_var = v
                break

        assert weight_var != None
        assigns.append(tf.assign(weight_var, value))

sess.run(assigns)

In [11]:
import math
import numpy as np
import tensorflow as tf
from lib.layers import *
from lib.tensor_utils import *
from collections import namedtuple
from models import TranslateModel


class TransformerEnsemble(TranslateModel):
    def __init__(self, name, models, inp_voc, out_voc, lm, **hp):
        self.name = name
        self.inp_voc = inp_voc
        self.out_voc = out_voc
        self.hp = hp
        self.lm = lm
        self.debug = hp.get('debug', None)

        self.models = models # Here we keep our models
        self.DecState = namedtuple("transformer_state", ['state_of_model_%i'%i for i in range(len(models))])

    def encode(self, batch, **kwargs):
        states = [m.encode(batch, **kwargs) for m in self.models]

        return self.DecState(*states)

    def decode(self, states, words=None, is_train=False, **kwargs):
        states = [m.decode(s, words, is_train, **kwargs) for s, m in zip(states, self.models)]

        return self.DecState(*states)

    def get_rdo(self, dec_states, **kwargs):
        dec_state = dec_states[0]
        return dec_state.rdo, dec_state.out_seq

    def get_attnP(self, dec_states, **kwargs):
        dec_state = dec_states[0]
        return dec_state.attnP

    def get_logits(self, dec_states, **flags):

        logits = []
        for state, model in zip(dec_states, self.models):
            logits.append(model.get_logits(state, **flags))

        return sum(logits) / len(self.models)

In [12]:
ensemble = TransformerEnsemble(model_name, models, inp_voc, out_voc, lm, **hp)
initialize_uninitialized_variables(sess)

print('Generating translations')
inp = tf.placeholder(tf.int32, [None, None])
sy_translations = ensemble.symbolic_translate(inp, back_prop=False, swap_memory=True).best_out
translations = []

for batch in tqdm(iterate_minibatches(src_data, batchsize=config.get('batch_size_for_inference'))):
    try:
        batch_data_ix = inp_voc.tokenize_many(batch[0])[:, :max_len]
        trans_ix = sess.run([sy_translations], feed_dict={inp: batch_data_ix})[0]
        # deprocess = True gets rid of BOS and EOS
        trans = out_voc.detokenize_many(trans_ix, unbpe=True, deprocess=True)

    except Exception as e:
        # we failed this batch. At least one sample is broken
        trans = []
        src_rows = batch[0]
        for row in src_rows:
            try:
                row_ix = inp_voc.tokenize_many([row])[:, :max_len]  # [1, inp_len]

                row_trans_ix = sess.run([sy_translations],
                                        feed_dict={inp: row_ix})[0]  # [1, out_len]

                # deprocess = True gets rid of BOS and EOS
                row_trans = out_voc.detokenize_many(row_trans_ix,
                                                    unbpe=True, deprocess=True)  # [1]
                trans.append(row_trans[0])
            except Exception as e:
                # we failed this very row. Use src as fallback
                trans.append(str(row).replace('\n', ''))  # cast to str just in case

    translations.extend(trans)

print('Saving the results into %s' % output_path)

with open(output_path, 'wb') as output_file:
    output_file.write('\n'.join(translations).encode('utf-8'))

Generating translations
Preparing BEAM SEARCH translate with params: {'swap_memory': True, 'back_prop': False, 'if_no_eos': 'last', 'force_bos': True, 'beam_spread': 3, 'beam_size': 3, 'max_len': None, 'min_len': None, 'batch_placeholder': {'inp': <tf.Tensor 'Placeholder:0' shape=(?, ?) dtype=int32>}, 'self': <lib.inference.BeamSearchInference object at 0x7f94e6b8cda0>, 'model': <__main__.TransformerEnsemble object at 0x7f94f7a0a400>, 'flags': {'len_alpha': 0.9, 'sampling_strategy': 'greedy'}}


313it [10:41,  2.05s/it]

Saving the results into ../en-fr-10/output.tok.txt





In [28]:
import nltk
from nltk import word_tokenize
from bleu import compute_bleu

nltk.download('punkt')

targets = open(os.path.join(data_path, 'ground_truth.txt'), 'r', encoding='utf-8').read().splitlines()

print('Tokenizing')
tokenized_refs = [[word_tokenize(t)] for t in targets]
tokenized_tranlations = [word_tokenize(s) for s in translations]

bleu = compute_bleu(tokenized_refs, tokenized_tranlations)[0]
print('BLEU: {:.4f}'.format(bleu))

[nltk_data] Downloading package punkt to /home/universome/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
Tokenizing
BLEU: 0.0032
