In [1]:
import tensorflow as tf
import numpy as np

In [2]:
tf.__version__

'1.2.0'

In [3]:
from seq2seq.rnn_seq2seq import create_seq2seq_model, create_seq2seq_experiment_fn

In [4]:
tf.reset_default_graph()

In [5]:
! rm -r ./logs

In [6]:
cell_num = 2

cell = "LSTMCell"
num_layers = 2
num_units = 16
bidirectional = True
attention = True
residual_connections = True
residual_dense = True

vocab_size = 10
emb_size = 5

training_mode = "scheduled_sampling_embedding"
scheduled_sampling_probability = 0.5

inference_mode = "beam"
beam_width = 3

In [7]:
def random_sequences(length_from, length_to,
                     vocab_lower, vocab_upper,
                     batch_size):
    """ Generates batches of random integer sequences,
        sequence length in [length_from, length_to],
        vocabulary in [vocab_lower, vocab_upper]
    """
    if length_from > length_to:
        raise ValueError('length_from > length_to')

    def random_length():
        if length_from == length_to:
            return length_from
        return np.random.randint(length_from, length_to + 1)
    
    while True:
        yield [
            np.random.randint(low=vocab_lower,
                              high=vocab_upper,
                              size=random_length()).tolist()
            for _ in range(batch_size)]

In [8]:
# from tensorflow.contrib.learn.python.learn.learn_io.generator_io import generator_input_fn
from seq2seq.input.generator_io import generator_input_fn

In [9]:
def data_generator_py():
    def generator():
        data_gen = random_sequences(9, 9, 2, 9, 1)
        for _ in range(1000):
            data = next(data_gen)
            
            data = np.array(data[0], dtype=np.int32)
            data_len = len(data)
            data_len = np.array(data_len, dtype=np.int32)

            yield {
                "inputs": data,
                "inputs_length": data_len,
                "targets": data,
                "targets_length": data_len
            }
    return generator

In [10]:
dg_train_input_fn = generator_input_fn(
    x=data_generator_py(), 
    target_key=["targets", "targets_length"], 
    batch_size=16, shuffle=False, num_epochs=None, 
    queue_capacity=128, num_threads=2, 
    pad_data=True)
dg_test_input_fn = generator_input_fn(
    x=data_generator_py(), 
    target_key=["targets", "targets_length"], 
    batch_size=16, shuffle=False, num_epochs=None, 
    queue_capacity=128, num_threads=1, 
    pad_data=True)

In [11]:
experiment_fn = create_seq2seq_experiment_fn(
    dg_train_input_fn, dg_test_input_fn, 
    train_steps=int(2e4), eval_steps=int(1e2), min_eval_frequency=int(1e2))

In [12]:
gpu_option = 0.5  # Yeap, Estimator can use memory limitations
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_option)

run_config = tf.contrib.learn.RunConfig(
    session_config=tf.ConfigProto(gpu_options=gpu_options),
    model_dir="./logs")

hparams = tf.contrib.training.HParams(
    cell_num=cell_num,
    vocab_size=vocab_size, embedding_size=emb_size,
    cell=cell, num_layers=num_layers, num_units=num_units,
    bidirectional=bidirectional, attention=attention,
    residual_connections=residual_connections, 
    residual_dense=residual_dense,
    training_mode=training_mode,
    learning_rate=1e-4,
    lr_decay_steps=100000,
    lr_decay_koef=0.99,
    gradient_clip=10.0,
    scheduled_sampling_probability=scheduled_sampling_probability,
    inference_mode=inference_mode,
    beam_width=beam_width)

In [13]:
tf.logging.set_verbosity(tf.logging.ERROR) # cause we don't want to log anything
tf.contrib.learn.learn_runner.run(
    experiment_fn=experiment_fn,
    run_config=run_config,
    schedule="train_and_evaluate",
    hparams=hparams)

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


({'global_step': 20001, 'loss': 1.1041856}, [])

In [14]:
dg_pred_input_fn = generator_input_fn(
    x=data_generator_py(), 
    target_key=["targets", "targets_length"], 
    batch_size=1, shuffle=False, num_epochs=None, 
    queue_capacity=128, num_threads=1, 
    pad_data=True)

In [15]:
model = create_seq2seq_model(config=run_config, hparams=hparams)

In [16]:
predictions = model.predict(dg_pred_input_fn)

In [17]:
pred = next(predictions)

In [18]:
pred

{'prediction': array([[4, 4, 4],
        [4, 4, 4],
        [4, 4, 4],
        [8, 8, 8],
        [4, 4, 4],
        [3, 3, 3],
        [4, 4, 4],
        [4, 3, 4],
        [4, 4, 3],
        [1, 1, 1]], dtype=int32),
 'score': array([[-0.23443063, -2.6063211 , -3.04360557],
        [-0.56695133, -2.49730635, -2.89267826],
        [-1.11435843, -2.39209199, -2.48673224],
        [-2.16065478, -2.32451248, -2.53673363],
        [-2.94408631, -3.16454864, -3.27134466],
        [-3.68032479, -3.80859733, -4.13525438],
        [-3.97157574, -4.36352539, -4.97107124],
        [-4.51236534, -5.02003574, -5.15970612],
        [-5.23004818, -5.45817852, -5.61370182],
        [-5.23004818, -5.45817852, -5.6137023 ]], dtype=float32)}