Tensorflow 1.4.0 is required.
This is based on [NMT Tutorial](https://github.com/tensorflow/nmt).

In [None]:
import platform


def colab():
    return "Darwin" != platform.system()


In [None]:
from __future__ import print_function

import copy
import datetime
import os
from random import randint

import numpy as np
import tensorflow as tf
from tensorflow.python.layers import core as layers_core
from tensorflow.python.platform import gfile

if colab():
    import matplotlib.pyplot as plt

## Note for me. You've summarized Seq2Seq at http://d.hatena.ne.jp/higepon/20171210/1512887715.

print(tf.__version__)

#@formatter:off
!mkdir "./saved_model2"
!mkdir "./saved_model"
!ls -la ./saved_model2
#@formatter:on

# TODO
# Use GRU instead of LSTM

test_hparams = tf.contrib.training.HParams(
    batch_size=3,
    encoder_length=5,
    decoder_length=5,
    num_units=6,
    vocab_size=9,
    embedding_size=8,
    learning_rate=0.01,
    max_gradient_norm=5.0,
    beam_width=9,
    use_attention=False,
    num_train_steps=100,
    debug_verbose=False
)

test_attention_hparams = copy.deepcopy(test_hparams)
test_attention_hparams.use_attention = True

real_hparams = tf.contrib.training.HParams(
    batch_size=25,  # of tweets should be devidable by batch_size
    encoder_length=20,
    decoder_length=20,
    num_units=1024,
    vocab_size=500,
    embedding_size=256,
    learning_rate=0.01,
    max_gradient_norm=5.0,
    beam_width=9,
    use_attention=False,
    num_train_steps=16,
    debug_verbose=False
)

large_hparams = tf.contrib.training.HParams(
    batch_size=50,  # of tweets should be devidable by batch_size
    encoder_length=30,
    decoder_length=30,
    num_units=1024,
    vocab_size=50000,
    embedding_size=1024,
    learning_rate=0.01,
    max_gradient_norm=5.0,
    beam_width=2,  # for faster iteration, this should be 10
    use_attention=False,
    num_train_steps=1000000,
    debug_verbose=False
)

# Model path
model_path = "./saved_model/twitter"

# Symbol for start decode process.
tgt_sos_id = 0

# Symbol for end of decode process.
tgt_eos_id = 1

pad_id = 2

unk_id = 3


def info(message, hparams):
    if hparams.debug_verbose:
        print(message)


In [None]:
# For debug purpose.
tf.reset_default_graph()


class ChatbotModel:
    def __init__(self, sess, hparams, model_path, scope='ChatbotModel'):
        self.sess = sess
        # todo remove
        self.hparams = hparams

        # todo
        self.model_path = model_path
        self.name = scope

        self.encoder_inputs, self.encoder_inputs_lengths, encoder_outputs, encoder_state, embedding_encoder = self._build_encoder(
            hparams, scope)
        self.decoder_inputs, self.decoder_target_lengths, logits = self._build_decoder(
            hparams, self.encoder_inputs_lengths, embedding_encoder,
            encoder_state, encoder_outputs, scope)

        self.reward = tf.placeholder(tf.float32, name="reward")
        self.target_labels, self.loss, self.global_step, self.train_op = self._build_optimizer(
            hparams, logits)

        # Initialize saver after model created
        self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=1)

    def restore(self):
        ckpt = tf.train.get_checkpoint_state(self.model_path)
        if ckpt:
            last_model = ckpt.model_checkpoint_path
            self.saver.restore(self.sess, last_model)
            return True
        else:
            print("Created fresh model.")
            return False

    def train(self, encoder_inputs, encoder_inputs_lengths, target_labels,
              decoder_inputs, decoder_target_lengths, reward=1.0):
        feed_dict = {
            self.encoder_inputs: encoder_inputs,
            self.encoder_inputs_lengths: encoder_inputs_lengths,
            self.target_labels: target_labels,
            self.decoder_inputs: decoder_inputs,
            self.decoder_target_lengths: decoder_target_lengths,
            # For normal Seq2Seq reward is always 1.
            self.reward: reward
        }
        _, loss_value, global_step = self.sess.run(
            [self.train_op, self.loss, self.global_step], feed_dict=feed_dict)
        return loss_value, global_step

    def batch_loss(self, encoder_inputs, encoder_inputs_lengths, target_labels,
                   decoder_inputs, decoder_target_lengths):
        feed_dict = {
            self.encoder_inputs: encoder_inputs,
            self.encoder_inputs_lengths: encoder_inputs_lengths,
            self.target_labels: target_labels,
            self.decoder_inputs: decoder_inputs,
            self.decoder_target_lengths: decoder_target_lengths,
            # For normal Seq2Seq reward is always 1.
            self.reward: 1
        }
        return self.sess.run(self.loss, feed_dict=feed_dict)

    def train_with_reward(self, infer_model, standard_seq2seq_model,
                          encoder_inputs, encoder_inputs_lengths, target_labels,
                          decoder_inputs, decoder_target_lengths,
                          dull_responses):
        infered_replies = infer_model.infer(encoder_inputs,
                                            encoder_inputs_lengths)
        standard_seq2seq_encoder_inputs = []
        standard_seq2seq_encoder_inputs_lengths = []
        for reply in infered_replies:
            standard_seq2seq_encoder_inputs_lengths.append(len(reply))
            if len(reply) <= self.hparams.encoder_length:
                standard_seq2seq_encoder_inputs.append(np.append(reply, (
                    [pad_id] * (self.hparams.encoder_length - len(reply)))))
            else:
                raise Exception(
                    "Infered reply is not suppose to be longer than encoder_input")
        standard_seq2seq_encoder_inputs = np.transpose(
            np.array(standard_seq2seq_encoder_inputs))
        reward1 = standard_seq2seq_model.reward_ease_of_answering(
            standard_seq2seq_encoder_inputs,
            standard_seq2seq_encoder_inputs_lengths, dull_responses)
        reward2 = 0  # todo
        reward3 = 0  # todo
        reward = 0.25 * reward1 + 0.25 * reward2 + 0.5 * reward3
        return self.train(encoder_inputs, encoder_inputs_lengths, target_labels,
                          decoder_inputs, decoder_target_lengths, reward)

    def save(self, model_path=None):
        if model_path is None:
            model_path = self.model_path
        model_dir = "{}/{}".format(model_path, self.name)
        self.saver.save(self.sess, model_dir, global_step=self.global_step)

    @staticmethod
    def _softmax(x):
        return np.exp(x) / np.sum(np.exp(x), axis=0)

    def _build_optimizer(self, hparams, logits):
        # Target labels
        #   As described in doc for sparse_softmax_cross_entropy_with_logits,
        #   labels should be [batch_size, decoder_target_lengths] instead of [batch_size, decoder_target_lengths, vocab_size].
        #   So labels should have indices instead of vocab_size classes.
        target_labels = tf.placeholder(tf.int32, shape=(
            hparams.batch_size, hparams.decoder_length), name="target_labels")

        # Loss
        crossent = tf.nn.sparse_softmax_cross_entropy_with_logits(
            labels=target_labels, logits=logits)

        loss = tf.reduce_sum(crossent / tf.to_float(hparams.batch_size))
        # Adjust loss with reward.
        loss = tf.multiply(loss, self.reward)

        # Train
        global_step = tf.get_variable(name="global_step", shape=[],
                                      dtype=tf.int32,
                                      initializer=tf.constant_initializer(0),
                                      trainable=False)

        # Calculate and clip gradients
        params = tf.trainable_variables()
        gradients = tf.gradients(loss, params)
        clipped_gradients, _ = tf.clip_by_global_norm(
            gradients, hparams.max_gradient_norm)

        # Optimization
        optimizer = tf.train.AdamOptimizer(hparams.learning_rate)
        train_op = optimizer.apply_gradients(
            zip(clipped_gradients, params), global_step=global_step)
        return target_labels, loss, global_step, train_op

    def _build_encoder(self, hparams, scope):
        # Encoder
        #   encoder_inputs: [encoder_length, batch_size]
        #   This is time major where encoder_length comes first instead of batch_size.
        #   encoder_inputs_lengths: [batch_size]
        encoder_inputs = tf.placeholder(tf.int32, shape=(
            hparams.encoder_length, hparams.batch_size), name="encoder_inputs")
        encoder_inputs_lengths = tf.placeholder(tf.int32,
                                                shape=(hparams.batch_size),
                                                name="encoder_inputs_lengtsh")

        # Embedding
        #   We originally didn't share embbedding between encoder and decoder.
        #   But now we share it. It makes much easier to calculate rewards.
        #   Matrix for embedding: [vocab_size, embedding_size]
        #   Should be shared between training and inference.
        with tf.variable_scope(scope):
            embedding_encoder = tf.get_variable("embedding_encoder",
                                                [hparams.vocab_size,
                                                 hparams.embedding_size])

        # Look up embedding:
        #   encoder_inputs: [encoder_length, batch_size]
        #   encoder_emb_inputs: [encoder_length, batch_size, embedding_size]
        encoder_emb_inputs = tf.nn.embedding_lookup(embedding_encoder,
                                                    encoder_inputs)

        # LSTM cell.
        with tf.variable_scope(scope):
            # Should be shared between training and inference.
            encoder_cell = tf.nn.rnn_cell.BasicLSTMCell(hparams.num_units)

        # Run Dynamic RNN
        #   encoder_outputs: [encoder_length, batch_size, num_units]
        #   encoder_state: [batch_size, num_units], this is final state of the cell for each batch.
        with tf.variable_scope(scope):
            encoder_outputs, encoder_state = tf.nn.dynamic_rnn(encoder_cell,
                                                               encoder_emb_inputs,
                                                               time_major=True,
                                                               dtype=tf.float32,
                                                               sequence_length=encoder_inputs_lengths)

        return encoder_inputs, encoder_inputs_lengths, encoder_outputs, encoder_state, embedding_encoder


    def _build_training_decoder(self, hparams, encoder_inputs_lengths,
                                encoder_state, encoder_outputs, decoder_cell,
                                decoder_emb_inputs, decoder_target_lengths,
                                projection_layer):
        # Decoder with helper:
        #   decoder_emb_inputs: [decoder_length, batch_size, embedding_size]
        #   decoder_target_lengths: [batch_size] vector, which represents each target sequence length.
        training_helper = tf.contrib.seq2seq.TrainingHelper(decoder_emb_inputs,
                                                            decoder_target_lengths,
                                                            time_major=True)

        # See https://github.com/tensorflow/tensorflow/issues/11904
        if hparams.use_attention:
            # Attention
            # encoder_outputs is time major, so transopse it to batch major.
            # attention_encoder_outputs: [batch_size, encoder_length, num_units]
            attention_encoder_outputs = tf.transpose(encoder_outputs, [1, 0, 2])

            # Create an attention mechanism
            attention_mechanism = tf.contrib.seq2seq.LuongAttention(
                hparams.num_units,
                attention_encoder_outputs,
                memory_sequence_length=encoder_inputs_lengths)

            wrapped_decoder_cell = tf.contrib.seq2seq.AttentionWrapper(
                decoder_cell, attention_mechanism,
                attention_layer_size=hparams.num_units)

            initial_state = wrapped_decoder_cell.zero_state(hparams.batch_size,
                                                            tf.float32).clone(
                cell_state=encoder_state)
        else:
            wrapped_decoder_cell = decoder_cell
            initial_state = encoder_state

            # Decoder and decode
        training_decoder = tf.contrib.seq2seq.BasicDecoder(
            wrapped_decoder_cell, training_helper, initial_state,
            output_layer=projection_layer)

        # Dynamic decoding
        #   final_outputs.rnn_output: [batch_size, decoder_length, vocab_size], list of RNN state.
        #   final_outputs.sample_id: [batch_size, decoder_length], list of argmax of rnn_output.
        #   final_state: [batch_size, num_units], list of final state of RNN on decode process.
        #   final_sequence_lengths: [batch_size], list of each decoded sequence. 
        final_outputs, _final_state, _final_sequence_lengths = tf.contrib.seq2seq.dynamic_decode(
            training_decoder)

        if hparams.debug_verbose:
            print("rnn_output.shape=", final_outputs.rnn_output.shape)
            print("sample_id.shape=", final_outputs.sample_id.shape)
            print("final_state=", _final_state)
            print("final_sequence_lengths.shape=",
                  _final_sequence_lengths.shape)

        logits = final_outputs.rnn_output
        return logits, wrapped_decoder_cell, initial_state

    def _build_decoder(self, hparams, encoder_inputs_lengths, embedding_encoder,
                       encoder_state, encoder_outputs, scope):
        # Decoder input
        #   decoder_inputs: [decoder_length, batch_size]
        #   decoder_target_lengths: [batch_size]
        #   This is grand truth target inputs for training.
        decoder_inputs = tf.placeholder(tf.int32, shape=(
            hparams.decoder_length, hparams.batch_size), name="decoder_inputs")
        decoder_target_lengths = tf.placeholder(tf.int32,
                                                shape=(hparams.batch_size),
                                                name="decoder_target_lengths")

        # Look up embedding:
        #   decoder_inputs: [decoder_length, batch_size]
        #   decoder_emb_inp: [decoder_length, batch_size, embedding_size]
        decoder_emb_inputs = tf.nn.embedding_lookup(embedding_encoder,
                                                    decoder_inputs)

        # https://stackoverflow.com/questions/39573188/output-projection-in-seq2seq-model-tensorflow
        # Internally, a neural network operates on dense vectors of some size,
        # often 256, 512 or 1024 floats (let's say 512 for here). 
        # But at the end it needs to predict a word from the vocabulary which is often much larger,
        # e.g., 40000 words. Output projection is the final linear layer that converts (projects) from the internal representation to the larger one.
        # So, for example, it can consist of a 512 x 40000 parameter matrix and a 40000 parameter for the bias vector.
        projection_layer = layers_core.Dense(hparams.vocab_size, use_bias=False)

        # We share this between training and inference.
        decoder_cell = tf.nn.rnn_cell.BasicLSTMCell(hparams.num_units)

        # Training graph
        logits, wrapped_decoder_cell, initial_state = self._build_training_decoder(
            hparams, encoder_inputs_lengths, encoder_state, encoder_outputs,
            decoder_cell, decoder_emb_inputs, decoder_target_lengths,
            projection_layer)

        return decoder_inputs, decoder_target_lengths, logits


In [None]:
class ChatbotInferenceModel:
    def __init__(self, sess, hparams, model_path, scope='ChatbotModel'):
        self.sess = sess
        # todo remove
        self.hparams = hparams

        # todo
        self.model_path = model_path
        self.name = scope

        self.encoder_inputs, self.encoder_inputs_lengths, encoder_outputs, encoder_state, embedding_encoder = self._build_encoder(
            hparams, scope)
        self.decoder_inputs, self.decoder_target_lengths, self.replies, self.beam_replies, self.infer_logits = self._build_decoder(
            hparams, self.encoder_inputs_lengths, embedding_encoder,
            encoder_state, encoder_outputs, scope)

        self.reward = tf.placeholder(tf.float32, name="reward")

        # we can't use variable length here,  because tiled_batch requires constant length.
        self.batch_size = 1

        # Initialize saver after model created
        self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=1)

    def restore(self):
        ckpt = tf.train.get_checkpoint_state(self.model_path)
        if ckpt:
            last_model = ckpt.model_checkpoint_path
            self.saver.restore(self.sess, last_model)
            return True
        else:
            print("Created fresh model.")
            return False

    def infer(self, encoder_inputs, encoder_inputs_lengths):
        inference_feed_dict = {
            self.encoder_inputs: encoder_inputs,
            self.encoder_inputs_lengths: encoder_inputs_lengths,
        }
        replies = self.sess.run(self.replies, feed_dict=inference_feed_dict)
        return replies

    def infer_beam_search(self, encoder_inputs, encoder_inputs_lengths):
        inference_feed_dict = {
            self.encoder_inputs: encoder_inputs,
            self.encoder_inputs_lengths: encoder_inputs_lengths,
        }
        replies = self.sess.run(self.beam_replies,
                                feed_dict=inference_feed_dict)
        return replies


    def log_prob(self, encoder_inputs, encoder_inputs_lengths, expected_output):
        """Return sum of log probability of given one specific expected_output for sencoder_inputs.
    
        Args:
            encoder_inputs: [encoder_length, batch_size], eg) tweets
            expected_output: [1, decoder_length or less than decoder_length], eg) One reply.
    
        Returns:
            Return log probablity of expected output for given encoder inputs.
            eg) sum of log probability of reply "Good" when given ["How are you?", "What's up?"]
        """
        inference_feed_dict = {
            self.encoder_inputs: encoder_inputs,
            self.encoder_inputs_lengths: encoder_inputs_lengths
        }

        # Logits
        #   logits_value: [batch_size, actual_decoder_length, vocab_size]
        logits_batch_value = self.sess.run(self.infer_logits,
                                           feed_dict=inference_feed_dict)

        sum_p = []
        # For each batch: [actual_decoder_length, vocab_size]
        for logits in logits_batch_value:
            p = 1
            # Note that expected_output and logits don't always have same length, but zip takes care of the case.
            for word_id, logit in zip(expected_output, logits):
                # Apply softmax first, see definition of softmax.
                norm = (self._softmax(logit))[word_id]
                p *= norm
            p = np.log(p)
            sum_p.append(p)
        ret = np.sum(sum_p) / len(sum_p)
        return ret

    def reward_ease_of_answering(self, encoder_inputs, encoder_inputs_lengths,
                                 expected_outputs):
        """ Return reward for ease of answering. See Deep Reinforcement Learning for Dialogue Generation for more details.
    
        Args:
            encoder_inputs: [encoder_length, batch_size], eg) tweets
            expected_outputs: [number of pre-defined dull responses, decoder_length or less than decoder_length]. eg) [["I'm", "Good"], ["fine"]]
    
        Returns:
            Return reward for ease of answering.
            Note that this can be calcualated by calling log_prob function for each dull response,
            but this function is more efficient because this calculated the reward at once.
        """
        inference_feed_dict = {
            self.encoder_inputs: encoder_inputs,
            self.encoder_inputs_lengths: encoder_inputs_lengths
        }

        # Logits
        #   logits_value: [batch_size, actual_decoder_length, vocab_size]
        logits_batch_value = self.sess.run(self.infer_logits,
                                           feed_dict=inference_feed_dict)

        batch_sum_p = []
        # For each batch: [actual_decoder_length, vocab_size]
        for logits in logits_batch_value:
            sum_p = []
            for expected_output in expected_outputs:
                p = 1
                # Note that expected_output and logits don't always have same length, but zip takes care of the case.
                for word_id, logit in zip(expected_output, logits):
                    # Apply softmax first, see definition of softmax.
                    norm = (self._softmax(logit))[word_id]
                    p *= norm
                p = np.log(p) / len(expected_output)
                sum_p.append(p)
            one_batch_p = np.sum(sum_p)
            batch_sum_p.append(one_batch_p)
        ret = np.sum(batch_sum_p) / len(batch_sum_p)
        return -ret


    @staticmethod
    def _softmax(x):
        return np.exp(x) / np.sum(np.exp(x), axis=0)

    def _build_encoder(self, hparams, scope):
        # Encoder
        #   encoder_inputs: [encoder_length, batch_size]
        #   This is time major where encoder_length comes first instead of batch_size.
        #   encoder_inputs_lengths: [batch_size]
        encoder_inputs = tf.placeholder(tf.int32,
                                        shape=[hparams.encoder_length, None],
                                        name="encoder_inputs")
        encoder_inputs_lengths = tf.placeholder(tf.int32, shape=[None],
                                                name="encoder_inputs_lengtsh")

        # Embedding
        #   We originally didn't share embbedding between encoder and decoder.
        #   But now we share it. It makes much easier to calculate rewards.
        #   Matrix for embedding: [vocab_size, embedding_size]
        #   Should be shared between training and inference.
        with tf.variable_scope(scope):
            embedding_encoder = tf.get_variable("embedding_encoder",
                                                [hparams.vocab_size,
                                                 hparams.embedding_size])

        # Look up embedding:
        #   encoder_inputs: [encoder_length, batch_size]
        #   encoder_emb_inputs: [encoder_length, batch_size, embedding_size]
        encoder_emb_inputs = tf.nn.embedding_lookup(embedding_encoder,
                                                    encoder_inputs)

        # LSTM cell.
        with tf.variable_scope(scope):
            # Should be shared between training and inference.
            encoder_cell = tf.nn.rnn_cell.BasicLSTMCell(hparams.num_units)

        # Run Dynamic RNN
        #   encoder_outputs: [encoder_length, batch_size, num_units]
        #   encoder_state: [batch_size, num_units], this is final state of the cell for each batch.
        with tf.variable_scope(scope):
            encoder_outputs, encoder_state = tf.nn.dynamic_rnn(encoder_cell,
                                                               encoder_emb_inputs,
                                                               time_major=True,
                                                               dtype=tf.float32,
                                                               sequence_length=encoder_inputs_lengths)

        return encoder_inputs, encoder_inputs_lengths, encoder_outputs, encoder_state, embedding_encoder

    def _build_greedy_inference(self, hparams, embedding_encoder, encoder_state,
                                encoder_inputs_lengths, encoder_outputs,
                                decoder_cell, projection_layer):
        # Greedy decoder
        dynamic_batch_size = tf.shape(encoder_inputs_lengths)[0]
        inference_helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(
            embedding_encoder,
            tf.fill([dynamic_batch_size], tgt_sos_id), tgt_eos_id)

        # See https://github.com/tensorflow/tensorflow/issues/11904
        if hparams.use_attention:
            # Attention
            # encoder_outputs is time major, so transopse it to batch major.
            # attention_encoder_outputs: [batch_size, encoder_length, num_units]
            attention_encoder_outputs = tf.transpose(encoder_outputs, [1, 0, 2])

            # Create an attention mechanism
            attention_mechanism = tf.contrib.seq2seq.LuongAttention(
                hparams.num_units,
                attention_encoder_outputs,
                memory_sequence_length=encoder_inputs_lengths)

            wrapped_decoder_cell = tf.contrib.seq2seq.AttentionWrapper(
                decoder_cell, attention_mechanism,
                attention_layer_size=hparams.num_units)

            initial_state = wrapped_decoder_cell.zero_state(dynamic_batch_size,
                                                            tf.float32).clone(
                cell_state=encoder_state)
        else:
            wrapped_decoder_cell = decoder_cell
            initial_state = encoder_state

        inference_decoder = tf.contrib.seq2seq.BasicDecoder(
            wrapped_decoder_cell, inference_helper, initial_state,
            output_layer=projection_layer)

        # len(infered_reply) is lte encoder_length, because we are targetting tweeet (140 for each tweet)
        # Also by doing this, we can pass the reply to other seq2seq w/o shorten it.
        maximum_iterations = hparams.encoder_length

        # Dynamic decoding
        outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(
            inference_decoder, maximum_iterations=maximum_iterations)
        replies = outputs.sample_id

        # We use infer_logits instead of logits when calculating log_prob, because infer_logits doesn't require decoder_target_lengths input.
        infer_logits = outputs.rnn_output
        return infer_logits, replies


    def _build_beam_search_inference(self, hparams, encoder_inputs_lengths,
                                     embedding_encoder, encoder_state,
                                     encoder_outputs, decoder_cell,
                                     projection_layer):
        dynamic_batch_size = tf.shape(encoder_inputs_lengths)[0]
        # https://github.com/tensorflow/tensorflow/issues/11904
        if hparams.use_attention:
            # Attention
            # encoder_outputs is time major, so transopse it to batch major.
            # attention_encoder_outputs: [batch_size, encoder_length, num_units]
            attention_encoder_outputs = tf.transpose(encoder_outputs, [1, 0, 2])

            tiled_encoder_outputs = tf.contrib.seq2seq.tile_batch(
                attention_encoder_outputs, multiplier=hparams.beam_width)
            tiled_encoder_final_state = tf.contrib.seq2seq.tile_batch(
                encoder_state, multiplier=hparams.beam_width)
            tiled_encoder_inputs_lengths = tf.contrib.seq2seq.tile_batch(
                encoder_inputs_lengths, multiplier=hparams.beam_width)

            # Create an attention mechanism
            attention_mechanism = tf.contrib.seq2seq.LuongAttention(
                hparams.num_units, tiled_encoder_outputs,
                memory_sequence_length=tiled_encoder_inputs_lengths)

            wrapped_decoder_cell = tf.contrib.seq2seq.AttentionWrapper(
                decoder_cell, attention_mechanism,
                attention_layer_size=hparams.num_units)

            decoder_initial_state = wrapped_decoder_cell.zero_state(
                dtype=tf.float32,
                batch_size=dynamic_batch_size * hparams.beam_width)
            decoder_initial_state = decoder_initial_state.clone(
                cell_state=tiled_encoder_final_state)

            # todo
            #    X_seq_len = tf.contrib.seq2seq.tile_batch(X_seq_len, multiplier=BEAM_WIDTH)

        else:
            wrapped_decoder_cell = decoder_cell
            decoder_initial_state = tf.contrib.seq2seq.tile_batch(encoder_state,
                                                                  multiplier=hparams.beam_width)

        # len(infered_reply) is lte encoder_length, because we are targetting tweeet (140 for each tweet)
        # Also by doing this, we can pass the reply to other seq2seq w/o shorten it.
        maximum_iterations = hparams.encoder_length

        inference_decoder = tf.contrib.seq2seq.BeamSearchDecoder(
            cell=wrapped_decoder_cell,
            embedding=embedding_encoder,
            start_tokens=tf.fill([dynamic_batch_size], tgt_sos_id),
            end_token=tgt_eos_id,
            initial_state=decoder_initial_state,
            beam_width=hparams.beam_width,
            output_layer=projection_layer,
            length_penalty_weight=0.0)

        # Dynamic decoding
        beam_outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(
            inference_decoder, maximum_iterations=maximum_iterations)
        beam_replies = beam_outputs.predicted_ids
        return beam_replies

    def _build_decoder(self, hparams, encoder_inputs_lengths, embedding_encoder,
                       encoder_state, encoder_outputs, scope):
        # Decoder input
        #   decoder_inputs: [decoder_length, batch_size]
        #   decoder_target_lengths: [batch_size]
        #   This is grand truth target inputs for training.
        decoder_inputs = tf.placeholder(tf.int32,
                                        shape=[hparams.decoder_length, None],
                                        name="decoder_inputs")
        decoder_target_lengths = tf.placeholder(tf.int32, shape=[None],
                                                name="decoder_target_lengths")

        # Look up embedding:
        #   decoder_inputs: [decoder_length, batch_size]
        #   decoder_emb_inp: [decoder_length, batch_size, embedding_size]
        decoder_emb_inputs = tf.nn.embedding_lookup(embedding_encoder,
                                                    decoder_inputs)

        # https://stackoverflow.com/questions/39573188/output-projection-in-seq2seq-model-tensorflow
        # Internally, a neural network operates on dense vectors of some size,
        # often 256, 512 or 1024 floats (let's say 512 for here). 
        # But at the end it needs to predict a word from the vocabulary which is often much larger,
        # e.g., 40000 words. Output projection is the final linear layer that converts (projects) from the internal representation to the larger one.
        # So, for example, it can consist of a 512 x 40000 parameter matrix and a 40000 parameter for the bias vector.
        projection_layer = layers_core.Dense(hparams.vocab_size, use_bias=False)

        # We share this between training and inference.
        decoder_cell = tf.nn.rnn_cell.BasicLSTMCell(hparams.num_units)

        # Greedy Inference graph
        infer_logits, replies = self._build_greedy_inference(hparams,
                                                             embedding_encoder,
                                                             encoder_state,
                                                             encoder_inputs_lengths,
                                                             encoder_outputs,
                                                             decoder_cell,
                                                             projection_layer)

        # Beam Search Inference graph
        beam_replies = self._build_beam_search_inference(hparams,
                                                         encoder_inputs_lengths,
                                                         embedding_encoder,
                                                         encoder_state,
                                                         encoder_outputs,
                                                         decoder_cell,
                                                         projection_layer)

        return decoder_inputs, decoder_target_lengths, replies, beam_replies, infer_logits


In [None]:
#@formater:off
def clear_saved_model():
    ! rm -rf ./saved_model 
    ! mkdir ./saved_model 
    ! rm -rf ./saved_model2 
    ! mkdir ./saved_model2 
    ! rm -rf ./saved_model3 
    ! mkdir ./saved_model3
#@formatter:on

In [None]:
# Helper functions to test
def make_test_training_data(hparams):
    train_encoder_inputs = np.empty(
        (hparams.encoder_length, hparams.batch_size), dtype=np.int)
    train_encoder_inputs_lengths = np.empty((hparams.batch_size), dtype=np.int)
    training_target_labels = np.empty(
        (hparams.batch_size, hparams.decoder_length), dtype=np.int)
    training_decoder_inputs = np.empty(
        (hparams.decoder_length, hparams.batch_size), dtype=np.int)

    # We keep first tweet to validate inference.
    first_tweet = None

    for i in range(hparams.batch_size):
        # Tweet
        tweet = np.random.randint(low=0, high=hparams.vocab_size,
                                  size=hparams.encoder_length)
        train_encoder_inputs[:, i] = tweet
        train_encoder_inputs_lengths[i] = len(tweet)
        # Reply
        #   Note that low = 2, as 0 and 1 are reserved.
        reply = np.random.randint(low=2, high=hparams.vocab_size,
                                  size=hparams.decoder_length - 1)

        training_target_label = np.concatenate((reply, np.array([tgt_eos_id])))
        training_target_labels[i] = training_target_label

        training_decoder_input = np.concatenate(([tgt_sos_id], reply))
        training_decoder_inputs[:, i] = training_decoder_input

        if i == 0:
            first_tweet = tweet
            info("0th tweet={}".format(tweet), hparams)
            info("0th reply_with_eos_suffix={}".format(training_target_label),
                 hparams)
            info("0th reply_with_sos_prefix={}".format(training_decoder_input),
                 hparams)

        info("Tweets", hparams)
        info(train_encoder_inputs, hparams)
        info("Replies", hparams)
        info(training_target_labels, hparams)
        info(training_decoder_inputs, hparams)
    return first_tweet, train_encoder_inputs, train_encoder_inputs_lengths, training_target_labels, training_decoder_inputs

def test_training(test_hparams, model, infer_model):
    if test_hparams.use_attention:
        print("==== training model[attention] ====")
    else:
        print("==== training model ====")
    first_tweet, train_encoder_inputs, train_encoder_inputs_lengths, training_target_labels, training_decoder_inputs = make_test_training_data(
        test_hparams)
    # Train
    x = []
    y = []
    for i in range(test_hparams.num_train_steps):
        loss_value, global_step = model.train(train_encoder_inputs,
                                              train_encoder_inputs_lengths,
                                              training_target_labels,
                                              training_decoder_inputs,
                                              np.ones((test_hparams.batch_size),
                                                      dtype=int) * test_hparams.decoder_length)
        if i % 5 == 0 and test_hparams.debug_verbose:
            print('.', end='')

        if i % 15 == 0:
            model.save()
            x.append(global_step)
            y.append(loss_value)
            if test_hparams.debug_verbose:
                print("loss={} step={}".format(loss_value, global_step))

    inference_encoder_inputs = np.empty((test_hparams.encoder_length, 1),
                                        dtype=np.int)
    inference_encoder_inputs_lengths = np.empty((1), dtype=np.int)
    for i in range(1):
        inference_encoder_inputs[:, i] = first_tweet
        inference_encoder_inputs_lengths[i] = len(first_tweet)

    # testing 
    log_prob54 = infer_model.log_prob(inference_encoder_inputs,
                                      inference_encoder_inputs_lengths,
                                      np.array([5, 4]))
    log_prob65 = infer_model.log_prob(inference_encoder_inputs,
                                      inference_encoder_inputs_lengths,
                                      np.array([6, 5]))
    print("log_prob for 54", log_prob54)
    print("log_prob for 65", log_prob65)

    reward = infer_model.reward_ease_of_answering(inference_encoder_inputs,
                                                  inference_encoder_inputs_lengths,
                                                  np.array([[5], [6]]))
    print("reward=", reward)

    if test_hparams.debug_verbose:
        print(inference_encoder_inputs)
    replies = infer_model.infer(inference_encoder_inputs,
                                inference_encoder_inputs_lengths)
    print("Infered replies", replies[0])
    print("Expected replies", training_target_labels[0])

    beam_replies = infer_model.infer_beam_search(inference_encoder_inputs,
                                                 inference_encoder_inputs_lengths)
    print("Infered replies candidate0", beam_replies[0][:, 0])
    print("Infered replies candidate1", beam_replies[0][:, 1])

    if test_hparams.debug_verbose:
        plt.plot(x, y, label="Loss")
        plt.plot()
        plt.xlabel("Loss")
        plt.ylabel("steps")
        plt.legend()
        plt.show()

def create_train_infer_models(graph, sess, hparams, model_path,
                              force_restore=False):
    with graph.as_default():
        with tf.variable_scope('root'):
            model = ChatbotModel(sess, hparams, model_path=model_path)

        with tf.variable_scope('root', reuse=True):
            infer_model = ChatbotInferenceModel(sess, hparams,
                                                model_path=model_path)
            restored = model.restore()
            if not restored:
                if force_restore:
                    raise ("Oops, couldn't restore")
                else:
                    sess.run(tf.global_variables_initializer())
        return model, infer_model

def create_train_infer_models_in_graphs(train_graph, train_sess, infer_graph,
                                        infer_sess, hparams, model_path):
    with train_graph.as_default():
        with tf.variable_scope('root'):
            model = ChatbotModel(train_sess, hparams, model_path=model_path)
            if not model.restore():
                train_sess.run(tf.global_variables_initializer())

    # note that infer_model is not sharing variable with traning model.
    with infer_graph.as_default():
        with tf.variable_scope('root'):
            infer_model = ChatbotInferenceModel(infer_sess, hparams,
                                                model_path=model_path)

    return model, infer_model

def test_multiple_models_training():
    first_tweet, train_encoder_inputs, train_encoder_inputs_length, training_target_labels, training_decoder_inputs = make_test_training_data(
        test_hparams)

    graph1 = tf.Graph()
    sess1 = tf.Session(graph=graph1)
    model, infer_model = create_train_infer_models(graph1, sess1, test_hparams,
                                                   "./saved_model/hige")
    test_training(test_hparams, model, infer_model)

    graph2 = tf.Graph()
    sess2 = tf.Session(graph=graph2)
    model2, infer_model2 = create_train_infer_models(graph2, sess2,
                                                     test_hparams,
                                                     "./saved_model2/hige")

    test_training(test_hparams, model2, infer_model2)
    dull_responses = [[4, 6, 6], [5, 5]]
    model2.train_with_reward(infer_model2, infer_model, train_encoder_inputs,
                             train_encoder_inputs_length,
                             training_target_labels, training_decoder_inputs,
                             np.ones((test_hparams.batch_size),
                                     dtype=int) * test_hparams.decoder_length,
                             dull_responses)

    # comment out until https://github.com/tensorflow/tensorflow/issues/10731 is fixed
    graph3 = tf.Graph()
    sess3 = tf.Session(graph=graph3)
#  model3, infer_model3 = create_train_infer_models(graph3, sess3, test_attention_hparams, "./saved_model3/hige")    
#  test_training(test_attention_hparams, model3, infer_model3)        


def test_save_restore_multiple_models_training():
    clear_saved_model()

    # Fresh model
    test_multiple_models_training()

    # Saved model
    test_multiple_models_training()


# todo support multiple models and attention.
# todo fix save and restore functions
# todo have that pattern for large.
# This is a test based on "Building Training, Eval, and Inference Graphs" in tensorflow/nmt.
def test_distributed_pattern(hparams):
    clear_saved_model()

    if hparams.use_attention:
        print("==== test_distributed_pattern[attention] ====")
    else:
        print("==== test_distributed_pattern ====")
    first_tweet, train_encoder_inputs, train_encoder_inputs_lengths, training_target_labels, training_decoder_inputs = make_test_training_data(
        hparams)

    train_graph = tf.Graph()
    infer_graph = tf.Graph()
    train_sess = tf.Session(graph=train_graph)
    infer_sess = tf.Session(graph=infer_graph)

    model, infer_model = create_train_infer_models_in_graphs(train_graph,
                                                             train_sess,
                                                             infer_graph,
                                                             infer_sess,
                                                             hparams,
                                                             "./saved_model/hige")

    for i in range(hparams.num_train_steps):
        loss_value, global_step = model.train(train_encoder_inputs,
                                              train_encoder_inputs_lengths,
                                              training_target_labels,
                                              training_decoder_inputs,
                                              np.ones((hparams.batch_size),
                                                      dtype=int) * hparams.decoder_length)

    model.save()

    inference_encoder_inputs = np.empty((hparams.encoder_length, 1),
                                        dtype=np.int)
    inference_encoder_inputs_lengths = np.empty((1), dtype=np.int)

    inference_encoder_inputs[:, 0] = first_tweet
    inference_encoder_inputs_lengths[0] = len(first_tweet)

    infer_model.restore()
    replies = infer_model.infer(inference_encoder_inputs,
                                inference_encoder_inputs_lengths)
    print("Infered replies", replies[0])
    print("Expected replies", training_target_labels[0])

    beam_replies = infer_model.infer_beam_search(inference_encoder_inputs,
                                                 inference_encoder_inputs_lengths)
    print("Infered replies candidate0", beam_replies[0][:, 0])
    print("Infered replies candidate1", beam_replies[0][:, 1])


In [None]:
test_save_restore_multiple_models_training()

test_distributed_pattern(test_hparams)

# todo
# test_distributed_pattern(test_attention_hparams)


In [None]:
def download_file_if_necessary(file_name):
    path = "./{}".format(file_name)
    if os.path.exists(path):
        return
    print("downloading {}...".format(file_name))
    str = read_file_from_drive(file_name)
    f = open(path, 'w')
    f.write(str)
    f.close()
    print("downloaded")


def read_file_from_drive(file_name):
    seq2seq_data_dir_id = "146ZLldWXLDH0l9WbSUNFKi3nVK_HV0Sz"
    file_list = drive.ListFile({'q': "'{}' in parents and trashed=false".format(
        seq2seq_data_dir_id)}).GetList()
    found = [file for file in file_list if file['title'] == file_name]
    if found != []:
        downloaded = drive.CreateFile({'id': found[0]['id']})
        return downloaded.GetContentString()
    else:
        raise ValueError("file {} not found.".format(file_name))


def read_vocabulary(vocabulary_path):
    download_file_if_necessary(vocabulary_path)
    rev_vocab = []
    rev_vocab.extend(read_file(vocabulary_path).splitlines())
    rev_vocab = [line.strip() for line in rev_vocab]
    vocab = dict([(x, y) for (y, x) in enumerate(rev_vocab)])
    return vocab, rev_vocab

def read_file(file_name):
    f = open("./{}".format(file_name))
    data = f.read()
    f.close()
    return data


In [None]:
def download_model_data_if_necessary(drive, model_path):
    if drive is None:
        return
    model_folder_in_drive = "18lYBgKvX3AG1zhwJqP1tRYJU688U1N95"
    file_list = drive.ListFile({'q': "'{}' in parents and trashed=false".format(
        model_folder_in_drive)}).GetList()
    if not os.path.exists(model_path):
        os.makedirs(model_path)
    for file in file_list:
        print("Downloading ", file['title'], "...", end='')
        target_file = "{}/{}".format(model_path, file['title'])
        if not os.path.exists(target_file):
            file.GetContentFile("{}/{}".format(model_path, file['title']))
        print("done")

def create_inference_input(hparams, vocab):
    inference_encoder_inputs = np.empty((hparams.encoder_length, 1),
                                        dtype=np.int)
    inference_encoder_inputs_lengths = np.empty((1), dtype=np.int)
    tweet = ["フォロー", "ありがとう", "メッセージ", "😢", "www"]
    #  tweet = ["おはよう"]
    tweet_ids = words_to_ids(tweet, vocab)
    len_tweet = len(tweet_ids)
    tweet_ids.extend([pad_id] * (hparams.encoder_length - len(tweet_ids)))
    for i in range(1):
        inference_encoder_inputs[:, i] = np.array(tweet_ids, dtype=np.int)
        inference_encoder_inputs_lengths[i] = len_tweet
    return inference_encoder_inputs, inference_encoder_inputs_lengths

def infer(infer_model, inference_encoder_inputs,
          inference_encoder_inputs_lengths, global_step, rev_vocab):
    replies = infer_model.infer(inference_encoder_inputs,
                                inference_encoder_inputs_lengths)
    reply = replies[0].tolist()
    print("  [step-{}] Infered reply".format(global_step),
          ids_to_words(reply, rev_vocab))

    beam_replies = infer_model.infer_beam_search(inference_encoder_inputs,
                                                 inference_encoder_inputs_lengths)
    print("  [step-{}] Infered replies candidate0".format(global_step),
          ids_to_words(beam_replies[0][:, 0], rev_vocab))
    print("  [step-{}] Infered replies candidate1".format(global_step),
          ids_to_words(beam_replies[0][:, 1], rev_vocab))

def generic_train_loop(train_feed_data, val_feed_data, vocab, rev_vocab,
                       model_path, hparams, generate_models_func,
                       inference_hook_func, drive=None, short_loop=False):
    x = []
    y = []

    download_model_data_if_necessary(drive, model_path)

    inference_encoder_inputs, inference_encoder_inputs_lengths = create_inference_input(
        hparams, vocab)

    graph, sess, model, infer_model = generate_models_func(hparams, model_path)

    with graph.as_default():
        train_data_iterator = train_feed_data.make_one_shot_iterator()
        val_data_iterator = None
        if val_feed_data is not None:
            val_data_iterator = val_feed_data.make_one_shot_iterator()

        last_saved_time = datetime.datetime.now()
        for i in range(hparams.num_train_steps):
            train_data = sess.run(train_data_iterator.get_next())
            loss_value, global_step = model.train(train_data[0], train_data[1],
                                                  train_data[2], train_data[3],
                                                  train_data[4])

            if short_loop and i == 2:
                x.append(global_step)
                y.append(loss_value)
                print("loss={:.2f}".format(loss_value))
                model.save()
                inference_hook_func(infer_model)
                infer(infer_model, inference_encoder_inputs,
                      inference_encoder_inputs_lengths, global_step, rev_vocab)
                if val_data_iterator is not None:
                    val_data = sess.run(val_data_iterator.get_next())
                    val_loss = model.batch_loss(val_data[0], val_data[1],
                                                val_data[2], val_data[3],
                                                val_data[4])
                    print("validation loss", val_loss)

                break
            elif i != 0 and i % 15 == 0:
                print("loss={:.2f}".format(loss_value))
                model.save()
                inference_hook_func(infer_model)
                infer(infer_model, inference_encoder_inputs,
                      inference_encoder_inputs_lengths, global_step, rev_vocab)
                if val_data_iterator is not None:
                    val_data = sess.run(val_data_iterator.get_next())
                    val_loss = model.batch_loss(val_data[0], val_data[1],
                                                val_data[2], val_data[3],
                                                val_data[4])
                    print("validation loss", val_loss)
                    x.append(global_step)
                    y.append(val_loss)

            else:
                print('.', end='')
            now = datetime.datetime.now()
            if (
                        now - last_saved_time).total_seconds() > 3600 and drive is not None:
                drive = make_drive()
                last_saved_time = datetime.datetime.now()
                save_model_in_drive(drive, model_path)

            if i != 0 and i % 100 == 0:
                plt.plot(x, y, label="Validation Loss")
                plt.plot()
                plt.ylabel("Validation Loss")
                plt.xlabel("steps")
                plt.legend()
                plt.show()

def train_loop(train_feed_data, val_feed_data, vocab, rev_vocab, model_path,
               hparams, drive=None, short_loop=False):
    def inference_hook(infer_model):
        None
    def generate_models(hparams, model_path):
        graph = tf.Graph()
        sess = tf.Session(graph=graph)
        model, infer_model = create_train_infer_models(graph, sess, hparams,
                                                       model_path)
        return graph, sess, model, infer_model

    generic_train_loop(train_feed_data, val_feed_data, vocab, rev_vocab,
                       model_path, hparams, generate_models, inference_hook,
                       drive, short_loop)

def train_loop_distributed_pattern(train_feed_data, val_feed_data, vocab,
                                   rev_vocab, model_path, hparams, drive=None,
                                   short_loop=False):
    def inference_hook(infer_model):
        # always restore from file, because it's in different graph.
        restored = infer_model.restore()
        assert (restored)
    def generate_models(hparams, model_path):
        train_graph = tf.Graph()
        infer_graph = tf.Graph()
        train_sess = tf.Session(graph=train_graph)
        infer_sess = tf.Session(graph=infer_graph)
        model, infer_model = create_train_infer_models_in_graphs(train_graph,
                                                                 train_sess,
                                                                 infer_graph,
                                                                 infer_sess,
                                                                 hparams,
                                                                 model_path)
        return train_graph, train_sess, model, infer_model

    generic_train_loop(train_feed_data, val_feed_data, vocab, rev_vocab,
                       model_path, hparams, generate_models, inference_hook,
                       drive, short_loop)


In [None]:
def train_rl_loop_distributed_pattern(train_feed_data, vocab, rev_vocab,
                                      src_model_path, dst_model_path, hparams,
                                      drive=None, short_loop=False):
    x = []
    y = []

    dull_responses = map(lambda x: words_to_ids(x, vocab), [["おはよう"], ["おつかれ"]])
    print(dull_responses)

    download_model_data_if_necessary(drive, src_model_path)

    inference_encoder_inputs, inference_encoder_inputs_lengths = create_inference_input(
        hparams, vocab)

    seq2seq_graph = tf.Graph()
    rl_graph = tf.Graph()

    seq2seq_sess = tf.Session(graph=seq2seq_graph)
    rl_sess = tf.Session(graph=rl_graph)

    with seq2seq_graph.as_default():
        with tf.variable_scope('root'):
            seq2seq_infer_model = ChatbotInferenceModel(seq2seq_sess, hparams,
                                                        model_path=src_model_path)
            restored = seq2seq_infer_model.restore()
            assert (restored)

    model, infer_model = create_train_infer_models(rl_graph, rl_sess, hparams,
                                                   src_model_path,
                                                   force_restore=True)
    with rl_graph.as_default():
        train_data_iterator = train_feed_data.make_one_shot_iterator()

    last_saved_time = datetime.datetime.now()
    for i in range(hparams.num_train_steps):
        train_data = rl_sess.run(train_data_iterator.get_next())
        loss_value, global_step = model.train_with_reward(infer_model,
                                                          seq2seq_infer_model,
                                                          train_data[0],
                                                          train_data[1],
                                                          train_data[2],
                                                          train_data[3],
                                                          train_data[4],
                                                          dull_responses)
        print('.', end='')

        if short_loop and i == 2:
            x.append(global_step)
            y.append(loss_value)
            print("loss={:.2f}".format(loss_value))
            model.save()
            infer(infer_model, inference_encoder_inputs,
                  inference_encoder_inputs_lengths, global_step, rev_vocab)
            break
        elif i != 0 and i % 15 == 0:
            x.append(global_step)
            y.append(loss_value)
            print("loss={:.2f}".format(loss_value))
            model.save(dst_model_path)
            infer(infer_model, inference_encoder_inputs,
                  inference_encoder_inputs_lengths, global_step, rev_vocab)

            now = datetime.datetime.now()
            if (
                now - last_saved_time).total_seconds() > 7200 and drive is not None:
                save_model_in_drive(drive, dst_model_path, is_rl=True)
                last_saved_time = datetime.datetime.now()

            if i != 0 and i % 100 == 0:
                plt.plot(x, y, label="Loss")
                plt.plot()
                plt.ylabel("Loss")
                plt.xlabel("steps")
                plt.legend()
                plt.show()


In [None]:
def create_encoder_idx_padded(src_file, dst_file, dst_length_file,
                              max_line_len):
    with open(src_file) as fin, open(dst_file, "w") as fout, open(
            dst_length_file, "w") as flen:
        line = fin.readline()
        while line:
            ids = [int(x) for x in line.split()]
            if len(ids) > max_line_len:
                ids = ids[:max_line_len]
            flen.write(str(len(ids)))
            flen.write("\n")
            if len(ids) < max_line_len:
                ids.extend([pad_id] * (max_line_len - len(ids)))
            ids = [str(x) for x in ids]
            fout.write(" ".join(ids))
            fout.write("\n")
            line = fin.readline()

# read decoder_idx file and append eos at the end of idx list.
def create_decoder_idx_eos(src_file, dst_file, max_line_len):
    with open(src_file) as fin, open(dst_file, "w") as fout:
        line = fin.readline()
        while line:
            ids = [int(x) for x in line.split()]
            if len(ids) > max_line_len - 1:
                ids = ids[:max_line_len - 1]
            ids.append(tgt_eos_id)
            if len(ids) < max_line_len:
                ids.extend([pad_id] * (max_line_len - len(ids)))
            ids = [str(x) for x in ids]
            fout.write(" ".join(ids))
            fout.write("\n")
            line = fin.readline()

# read decoder_idx file and put sos at the begining of the idx list.
# also wrte out length of index list.
def create_decoder_idx_sos(src_file, dst_file, dst_length_file, max_line_len):
    with open(src_file) as fin, open(dst_file, "w") as fout, open(
            dst_length_file, "w") as flen:
        line = fin.readline()
        while line:
            ids = [tgt_sos_id]
            ids.extend([int(x) for x in line.split()])
            if len(ids) > max_line_len:
                ids = ids[:max_line_len]
            flen.write(str(len(ids)))
            flen.write("\n")
            if len(ids) < max_line_len:
                ids.extend([pad_id] * (max_line_len - len(ids)))
            ids = [str(x) for x in ids]
            fout.write(" ".join(ids))
            fout.write("\n")
            line = fin.readline()


In [None]:
def split_to_int_values(x):
    return tf.string_to_number(tf.string_split([x]).values, tf.int32)

def textLineSplitDataset(filename):
    return tf.data.TextLineDataset("./{}".format(filename)).map(
        split_to_int_values)

def words_to_ids(words, vocab):
    ids = []
    for word in words:
        if word in vocab:
            ids.append(vocab[word])
        else:
            ids.append(unk_id)
    return ids

def ids_to_words(ids, rev_vocab):
    words = ""
    for id in ids:
        words += rev_vocab[id]
    return words

def make_train_dataset(tweets_enc_idx_file, tweets_dec_idx_file, vocab_file,
                       hparams):
    sess = tf.Session()
    # todo: skip if already exists
    tweets_enc_idx_padded_file = "{}.padded".format(tweets_enc_idx_file)
    tweets_enc_idx_len_file = "{}.len".format(tweets_enc_idx_file)

    tweets_dec_idx_eos_file = "{}.eos".format(tweets_dec_idx_file)
    tweets_dec_idx_sos_file = "{}.sos".format(tweets_dec_idx_file)
    tweets_dec_idx_len_file = "{}.len".format(tweets_dec_idx_file)

    download_file_if_necessary(tweets_enc_idx_file)
    create_encoder_idx_padded(tweets_enc_idx_file, tweets_enc_idx_padded_file,
                              tweets_enc_idx_len_file, hparams.encoder_length)
    print(tweets_enc_idx_padded_file, " created")

    download_file_if_necessary(tweets_dec_idx_file)
    create_decoder_idx_eos(tweets_dec_idx_file, tweets_dec_idx_eos_file,
                           hparams.decoder_length)
    print(tweets_dec_idx_eos_file, " created")

    create_decoder_idx_sos(tweets_dec_idx_file, tweets_dec_idx_sos_file,
                           tweets_dec_idx_len_file, hparams.decoder_length)
    print(tweets_dec_idx_sos_file, " created")

    tweets_dataset = textLineSplitDataset(tweets_enc_idx_padded_file)
    tweets_lengths_dataset = tf.data.TextLineDataset(tweets_enc_idx_len_file)

    replies_sos_dataset = textLineSplitDataset(tweets_dec_idx_sos_file)
    replies_eos_dataset = textLineSplitDataset(tweets_dec_idx_eos_file)
    replies_sos_lengths_dataset = tf.data.TextLineDataset(
        tweets_dec_idx_len_file)

    tweets_transposed = tweets_dataset.batch(hparams.batch_size).map(
        lambda x: tf.transpose(x))
    tweets_lengths = tweets_lengths_dataset.batch(hparams.batch_size)

    replies_with_eos_suffix = replies_eos_dataset.batch(hparams.batch_size)
    replies_with_sos_prefix = replies_sos_dataset.batch(hparams.batch_size).map(
        lambda x: tf.transpose(x))
    replies_with_sos_suffix_lengths = replies_sos_lengths_dataset.batch(
        hparams.batch_size)

    info("tweets_example: {}".format(
        sess.run(tweets_transposed.make_one_shot_iterator().get_next())),
         hparams)
    info("tweets_lengths_example:{}".format(
        sess.run(tweets_lengths.make_one_shot_iterator().get_next())), hparams)
    info("reply_with_eos_suffix_example:{}".format(
        sess.run(replies_with_eos_suffix.make_one_shot_iterator().get_next())),
         hparams)
    info("reply_with_sos_prefix_example:{}".format(
        sess.run(replies_with_sos_prefix.make_one_shot_iterator().get_next())),
         hparams)
    info("reply_with_sos_lengths_prefix_example:{}".format(sess.run(
        replies_with_sos_suffix_lengths.make_one_shot_iterator().get_next())),
         hparams)

    # Merge all using zip
    train_feed_data = tf.data.Dataset.zip((tweets_transposed, tweets_lengths,
                                           replies_with_eos_suffix,
                                           replies_with_sos_prefix,
                                           replies_with_sos_suffix_lengths))
    train_feed_data_value = sess.run(
        train_feed_data.make_one_shot_iterator().get_next())
    info("train_feed_data={}".format(train_feed_data_value[0]), hparams)
    info("train_feed_data={}".format(train_feed_data_value[1]), hparams)
    info("train_feed_data={}".format(train_feed_data_value[2]), hparams)
    info("train_feed_data={}".format(train_feed_data_value[3]), hparams)

    print("Dataset created")

    vocab, rev_vocab = read_vocabulary(vocab_file)
    return train_feed_data, vocab, rev_vocab

def save_model_in_drive(drive, model_path, is_rl=False):
    normal_model_folder_in_drive = "18lYBgKvX3AG1zhwJqP1tRYJU688U1N95"
    rl_model_folder_in_drive = "1pHnOuT_7JjD1TS8VQ4KN9oUiblBIABXJ"
    model_folder_in_drive = normal_model_folder_in_drive
    if is_rl:
        model_folder_in_drive = rl_model_folder_in_drive
    file_list = drive.ListFile({'q': "'{}' in parents and trashed=false".format(
        model_folder_in_drive)}).GetList()
    for model_file in os.listdir(model_path):
        file = drive.CreateFile({'title': model_file, "parents": [
            {"kind": "drive#fileLink", "id": model_folder_in_drive}]})
        file.SetContentFile("{}/{}".format(model_path, model_file))
        print("Uploading ", model_file, "...", end="")
        file.Upload()
        print("done")
    for file in file_list:
        f = drive.CreateFile({'id': file['id']})
        f.Delete()


In [None]:
if colab():
    !pip install pydrive
    from pydrive.auth import GoogleAuth
    from pydrive.drive import GoogleDrive
    from google.colab import auth
    from oauth2client.client import GoogleCredentials

    def make_drive():
        # 1. Authenticate and create the PyDrive client.
        auth.authenticate_user()
        gauth = GoogleAuth()
        gauth.credentials = GoogleCredentials.get_application_default()
        drive = GoogleDrive(gauth)
        return drive
    drive = make_drive()


In [None]:
download_file_if_necessary("tweets_train_dec_idx.txt")
download_file_if_necessary("tweets_train_enc_idx.txt")

create_decoder_idx_eos("./tweets_train_dec_idx.txt",
                       "./tweets_train_dec_eos_idx.txt",
                       real_hparams.decoder_length)
create_decoder_idx_sos("./tweets_train_dec_idx.txt",
                       "./tweets_train_dec_sos_idx.txt",
                       "./tweets_train_dec_sos_idx_len.txt",
                       real_hparams.decoder_length)
create_encoder_idx_padded("./tweets_train_enc_idx.txt",
                          "./tweets_train_enc_idx_padded.txt",
                          "./tweets_train_enc_idx_len.txt",
                          real_hparams.encoder_length)


In [None]:
def print_header(name):
    print("==========   {}   ========".format(name))

def test_small_train_loops():
    print_header("make train dataset")
    train_feed_data, vocab, rev_vocab = make_train_dataset(
        "tweets_train_enc_idx.txt", "tweets_train_dec_idx.txt", "vocab.txt",
        real_hparams)

    print_header("train_loop")
    train_loop(train_feed_data.repeat(10), None, vocab, rev_vocab,
               "./saved_model/real", real_hparams, short_loop=True)

    print_header("train_loop_distributed_pattern")
    train_loop_distributed_pattern(train_feed_data.repeat(10), None, vocab,
                                   rev_vocab, "./saved_model/real",
                                   real_hparams, short_loop=True)
  
    print_header("train_rl_loop_distributed_pattern")
    train_rl_loop_distributed_pattern(train_feed_data.repeat(10), vocab,
                                      rev_vocab, "./saved_model/real",
                                      "./saved_model/real_rl", real_hparams,
                                      short_loop=True)


In [None]:
test_small_train_loops()

In [None]:
should_run_large_train = False
should_run_large_train_distributed = False
should_run_large_train_rl = False

if should_run_large_train or should_run_large_train_distributed or should_run_large_train_rl:
    train_feed_data, vocab, rev_vocab = make_train_dataset(
        "tweets_train_enc_idx_large.txt", "tweets_train_dec_idx_large.txt",
        "vocab_large.txt", large_hparams)
    val_feed_data, vocab, rev_vocab = make_train_dataset(
        "tweets_val_enc_idx_large.txt", "tweets_val_dec_idx_large.txt",
        "vocab_large.txt", large_hparams)
    val_feed_data = val_feed_data.shuffle(4096).repeat()


In [None]:
if should_run_large_train:
    train_loop(train_feed_data, vocab, rev_vocab, "./saved_model/large",
               large_hparams, drive=drive)
if should_run_large_train_distributed:
    train_loop_distributed_pattern(train_feed_data, val_feed_data, vocab,
                                   rev_vocab, "./saved_model/large",
                                   large_hparams, drive=drive)
if should_run_large_train_rl:
    train_rl_loop_distributed_pattern(train_feed_data, vocab, rev_vocab,
                                      "./saved_model/large",
                                      "./saved_model/large_rl", large_hparams,
                                      drive=drive)


In [None]:
#drive=make_drive()
#save_model_in_drive(drive, "./saved_model/large")

In [None]:
!ls saved_model/large
#!rm -rf  saved_model/large