In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np
import inspect
import pickle

## Old classes and functions

In [2]:
with open('word_to_id.pickle', 'rb') as handle:
    word_to_id = pickle.load(handle)

with open('id_to_word.pickle', 'rb') as handle:
    id_to_word = pickle.load(handle)

In [9]:
class config(object):
    vocab_size = 10000
    batch_size = 20
    num_steps = 20  # sequence length; the number of unrolls
    hidden_size = 200  # number of hidden units in LSTM; also embedding size
    keep_prob = 0.5  # 1 - dropoff rate
    num_layers = 2  # number of LSTM layers
    max_grad_norm = 5  # max gradient (to prevent the exploding gradient problems)
    init_scale = 0.1  # the initial scale of the weights
    max_epoch = 4  # the number of epochs trained with the initial learning rate
    max_max_epoch = 13  # the total number of epochs for training
    learning_rate = 1.0  # the initial value of the learning rate
    lr_decay = 0.5  # the decay of the learning rate for each epoch after "max_epoch"

eval_config = config()
eval_config.batch_size = 1
eval_config.num_steps = 1

In [10]:
class PTBModel(object):
    """The PTB model."""

    def __init__(self, is_training, config, input_=None):
        batch_size = config.batch_size
        num_steps = config.num_steps
        hidden_size = config.hidden_size
        vocab_size = config.vocab_size
        
        if input_ is not None:
            # For normal training and validation
            self._input = input_
            self._input_data = input_.input_data
            self._targets = input_.targets
            
        else:
            # For text generations
            self._input_data = tf.placeholder(tf.int32, [batch_size, num_steps])
            self._targets = tf.placeholder(tf.int32, [batch_size, num_steps])

        def lstm_cell():
            # With the latest TensorFlow source code (as of Mar 27, 2017),
            # the BasicLSTMCell will need a reuse parameter which is unfortunately not
            # defined in TensorFlow 1.0. To maintain backwards compatibility, we add
            # an argument check here:
            
            if 'reuse' in inspect.getargspec(
                    tf.contrib.rnn.BasicLSTMCell.__init__).args:
                return tf.contrib.rnn.BasicLSTMCell(
                    hidden_size,
                    forget_bias=0.0,
                    state_is_tuple=True,
                    reuse=tf.get_variable_scope().reuse)
            else:
                return tf.contrib.rnn.BasicLSTMCell(
                    hidden_size,
                    forget_bias=0.0,
                    state_is_tuple=True)
            
            # Note because we set `state_is_tuple=True`, the states are 2-tuples of the `c_state` and `h_state`
            # `c_state` is the cell state
            # `h_state` is the hidden state
            # See this SO thread: https://stackoverflow.com/questions/41789133/c-state-and-m-state-in-tensorflow-lstm
    
        attn_cell = lstm_cell

        # Implement dropoff (for training only)
        if is_training and config.keep_prob < 1:

            def attn_cell():
                return tf.contrib.rnn.DropoutWrapper(
                    lstm_cell(), output_keep_prob=config.keep_prob)

        # Stacking multiple LSTMs
        attn_cells = [attn_cell() for _ in range(config.num_layers)]
        stacked_lstm = tf.contrib.rnn.MultiRNNCell(attn_cells, state_is_tuple=True)
        
        # Initialize states with zeros
        # `_initial_state` is a list of `num_layers` tensors
        # Each is a tuple of (`c_state`, `h_state`),
        # and both `c_state` and `h_state` are shaped [batch_size, hidden_size]
        self._initial_state = stacked_lstm.zero_state(batch_size, tf.float32)
        
        # The word IDs will be embedded into a dense representation before feeding to the LSTM.
        # This allows the model to efficiently represent the knowledge about particular words.
        with tf.device("/cpu:0"):
            embedding = tf.get_variable(
                "embedding", [vocab_size, hidden_size], dtype=tf.float32)
            input_embeddings = tf.nn.embedding_lookup(embedding, self.input_data)
            # The shape of `input_embeddings` is [batch_size, num_steps, hidden_size]
        
        # Implement dropoff (for training only)
        if is_training and config.keep_prob < 1:
            input_embeddings = tf.nn.dropout(input_embeddings, config.keep_prob)

        # Simplified version of models/tutorials/rnn/rnn.py's rnn().
        # This builds an unrolled LSTM for tutorial purposes only.
        # In general, use the rnn() or state_saving_rnn() from rnn.py.
        #
        # The alternative version of the code below is:
        #
        # inputs = tf.unstack(inputs, num=num_steps, axis=1)
        # outputs, state = tf.contrib.rnn.static_rnn(
        #     cell, inputs, initial_state=self._initial_state)
        
        # Unroll LSTM loop
        outputs = []
        state = self._initial_state
        
        with tf.variable_scope("RNN"):
            for time_step in range(num_steps):
                if time_step > 0:
                    tf.get_variable_scope().reuse_variables()
                
                (cell_output, state) = stacked_lstm(input_embeddings[:, time_step, :], state)
                outputs.append(cell_output)
                # `outputs` is a list of `num_steps` tensors, each shaped [batch_size, hidden_size]
        
        # Resize the ouput into a [batch_size * num_steps, hidden_size] matrix.
        # Note axis=1 because we want to group words together according to its original sequence
        # in order to compare with `targets` to compute loss later.
        output = tf.reshape(tf.stack(axis=1, values=outputs), [-1, hidden_size])
        
        # Compute logits
        softmax_w = tf.get_variable(
            "softmax_w", [hidden_size, vocab_size], dtype=tf.float32)
        softmax_b = tf.get_variable(
            "softmax_b", [vocab_size], dtype=tf.float32)
        
        self._logits = logits = tf.matmul(output, softmax_w) + softmax_b
        # The shape of `logits` =
        # [batch_size * num_steps, hidden_size] x [hidden_size, vocab_size] + [vocab_size] =
        # [batch_size * num_steps, vocab_size]
        
        # Sample based on the size of logits (used for text generation)
        self._logits_sample = tf.multinomial(logits, 1)
        
        # Reshape logits to be 3-D tensor for sequence loss
        logits = tf.reshape(logits, [batch_size, num_steps, vocab_size])

        # Use the contrib sequence loss and average over the batches
        # Source code: https://github.com/tensorflow/tensorflow/blob/r1.2/tensorflow/contrib/seq2seq/python/ops/loss.py#L30
        loss = tf.contrib.seq2seq.sequence_loss(
            logits,  # shape: [batch_size, num_steps, vocab_size]
            self._targets,  # shape: [batch_size, num_steps]
            tf.ones([batch_size, num_steps], dtype=tf.float32),
            average_across_timesteps=False,
            average_across_batch=True)

        # Update the cost variables
        self._cost = cost = tf.reduce_sum(loss)
        self._final_state = state

        if not is_training:
            return

        # Optimizer
        self._lr = tf.Variable(0.0, trainable=False)
        
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(
            tf.gradients(cost, tvars), config.max_grad_norm)
        
        optimizer = tf.train.GradientDescentOptimizer(self._lr)
        self._train_op = optimizer.apply_gradients(
            zip(grads, tvars),
            global_step=tf.contrib.framework.get_or_create_global_step())

        self._new_lr = tf.placeholder(
            tf.float32, shape=[], name="new_learning_rate")
        self._lr_update = tf.assign(self._lr, self._new_lr)
        
        
    # To update learning rate
    def assign_lr(self, session, lr_value):
        session.run(self._lr_update, feed_dict={self._new_lr: lr_value})
    
    @property
    def input(self):
        return self._input
    
    @property
    def input_data(self):
        return self._input_data
    
    @property
    def targets(self):
        return self._targets

    @property
    def initial_state(self):
        return self._initial_state

    @property
    def cost(self):
        return self._cost

    @property
    def final_state(self):
        return self._final_state

    @property
    def lr(self):
        return self._lr

    @property
    def train_op(self):
        return self._train_op
    
    @property
    def logits_sample(self):
        return self._logits_sample

In [11]:
# Use "it" as the start of the sentence
feed = np.array(word_to_id['it']).reshape(1, 1)

# Define sentence length
text_length = 200

def generate_text(session, model, feed, text_length):
    state = session.run(model.initial_state)
    fetches = {
        "final_state": model.final_state,
        "logits": model.logits_sample
    }
    
    generated_text = [feed]
    
    for i in range(text_length):
        feed_dict = {}
        feed_dict[model.input_data] = feed
        
        for i, (c, h) in enumerate(model.initial_state):
            feed_dict[c] = state[i].c
            feed_dict[h] = state[i].h
        
        vals = session.run(fetches, feed_dict)
        
        # Extract final_state and sampled logits after the current step,
        # which become the new state and feed for the next step
        state = vals["final_state"]
        feed = vals["logits"]
        
        # Append generated text
        generated_text.append(feed)

    return generated_text

## Restore

In [12]:
model_path = 'model_output_2017-07-25-19-15'

In [14]:
with tf.Graph().as_default():
    initializer = tf.random_uniform_initializer(-config.init_scale, config.init_scale)
    
    # Define model for text generations
    initializer = tf.random_uniform_initializer(-config.init_scale, config.init_scale)
    with tf.name_scope("Feed"):
        with tf.variable_scope("Model", reuse=None, initializer=initializer):
            mfeed = PTBModel(is_training=False, config=eval_config)
    
    with tf.Session() as session:
        # Initialize variables
        init = tf.global_variables_initializer()
        session.run(init)

        # Restore model weights from previously saved model
        saver = tf.train.Saver()
        ckpt = tf.train.get_checkpoint_state(model_path)
        saver.restore(session, ckpt.model_checkpoint_path)
        print("Model restored from file: %s" % ckpt.model_checkpoint_path)
        
        generated_text = generate_text(session, mfeed, np.array(feed).reshape(1, 1), text_length)
        generated_text = ' '.join([id_to_word[text[0, 0]] for text in generated_text])
        print("Sample text generation:", generated_text)

Model restored from file: model_output_2017-07-25-19-15/model.ckpt-29975
('Sample text generation:', u"it 's won in material and capability and large marketing institutional investors <eos> for capital rooms for instance that and the first decline at the major period in july of the october N rate account was low from the same year <eos> but but some analysts say that which pencil shows completed leaving societe louis 's turnaround <eos> cash exploration are insured around N last year <eos> <unk> employed by N issues <unk> asian up N <eos> commission <unk> funds <eos> we 're wind for responsibility at least this year when britain tax-loss <unk> all shrinking results in singapore <eos> no grab could still broaden inflation for the debt bill later they say is a lot of money and not use there is a hurry to happen ahead and on others <eos> in addition to which mr. davis N years old widened a ban for clothing as by <unk> author last week as much as a tokyo-based and voice of operations <eos>