In [None]:
#!wget http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz
#!tar xvf simple-examples.tgz

In [1]:
"""Utilities for parsing PTB text files."""
from __future__ import absolute_import
from __future__ import print_function

import collections
import os
import sys
import time

import tensorflow.python.platform

import numpy as np
from six.moves import xrange  # pylint: disable=redefined-builtin
import tensorflow as tf
from tensorflow.models.rnn import rnn

from tensorflow.python.platform import gfile


def _read_words(filename):
  with gfile.GFile(filename, "r") as f:
    return f.read().replace("\n", "<eos>").split()


def _build_vocab(filename):
  data = _read_words(filename)

  counter = collections.Counter(data)
  count_pairs = sorted(counter.items(), key=lambda x: -x[1])

  words, _ = list(zip(*count_pairs))
  word_to_id = dict(zip(words, range(len(words))))

  return word_to_id


def _file_to_word_ids(filename, word_to_id):
  data = _read_words(filename)
  return [word_to_id[word] for word in data]


def ptb_raw_data():
  """Load PTB raw data from data directory "data_path".
  Reads PTB text files, converts strings to integer ids,
  and performs mini-batching of the inputs.
  The PTB dataset comes from Tomas Mikolov's webpage:
  http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz
  Args:
    data_path: string path to the directory where simple-examples.tgz has
      been extracted.
  Returns:
    tuple (train_data, valid_data, test_data, vocabulary)
    where each of the data objects can be passed to PTBIterator.
  """

  train_path = os.path.join('./simple-examples/data/', "ptb.train.txt")
  valid_path = os.path.join('./simple-examples/data/', "ptb.valid.txt")
  test_path = os.path.join('./simple-examples/data', "ptb.test.txt")

  word_to_id = _build_vocab(train_path)
  train_data = _file_to_word_ids(train_path, word_to_id)
  valid_data = _file_to_word_ids(valid_path, word_to_id)
  test_data = _file_to_word_ids(test_path, word_to_id)
  vocabulary = len(word_to_id)
  return train_data, valid_data, test_data, vocabulary


def ptb_iterator(raw_data, batch_size, num_steps):
  """Iterate on the raw PTB data.
  This generates batch_size pointers into the raw PTB data, and allows
  minibatch iteration along these pointers.
  Args:
    raw_data: one of the raw data outputs from ptb_raw_data.
    batch_size: int, the batch size.
    num_steps: int, the number of unrolls.
  Yields:
    Pairs of the batched data, each a matrix of shape [batch_size, num_steps].
    The second element of the tuple is the same data time-shifted to the
    right by one.
  Raises:
    ValueError: if batch_size or num_steps are too high.
  """
  raw_data = np.array(raw_data, dtype=np.int32)

  data_len = len(raw_data)
  batch_len = data_len // batch_size
  data = np.zeros([batch_size, batch_len], dtype=np.int32)
  for i in range(batch_size):
    data[i] = raw_data[batch_len * i:batch_len * (i + 1)]

  epoch_size = (batch_len - 1) // num_steps

  if epoch_size == 0:
    raise ValueError("epoch_size == 0, decrease batch_size or num_steps")

  for i in range(epoch_size):
    x = data[:, i*num_steps:(i+1)*num_steps]
    y = data[:, i*num_steps+1:(i+1)*num_steps+1]
    yield (x, y)

In [None]:
from __future__ import absolute_import
from __future__ import print_function

import time

import tensorflow.python.platform

import numpy as np
import tensorflow as tf

from tensorflow.models.rnn import rnn_cell
from tensorflow.models.rnn import seq2seq

class PTBModel(object):

  def __init__(self, is_training, config):
    
    '''batch_size and num_steps are fundamental concepts in our model.
    batch_size controls how much data you are considering at a time - it helps control the computational size of training.
    num_steps controls the degree of unrolling - it truncates back propagation.'''
    self.batch_size = config.batch_size
    self.num_steps = config.num_steps

    '''Placeholders for input and output data.'''
    self.input_data = tf.placeholder(tf.int32, [self.batch_size, self.num_steps])
    self.targets = tf.placeholder(tf.int32, [self.batch_size, self.num_steps])

    '''START NET STRUCTURE'''
    '''Set up an RNN using LSTM cells of size config.hidden_size with dropout probability of 1/config.keep_prob
    and config.num_layers layers.'''
    '''Basic LSTM cell with config.hidden_size units.  forget_bias is set to 0.0 rather than 1.0 (default).
    What does forget_bias mean?'''
    lstm_cell = rnn_cell.BasicLSTMCell(config.hidden_size, forget_bias=0.0)
    
    '''If this is training (and the probability indicates dropout) then add dropout to the cell'''
    if is_training and config.keep_prob < 1:
      lstm_cell = rnn_cell.DropoutWrapper(
          lstm_cell, output_keep_prob=config.keep_prob)
      
    '''If config indicates multiple, stacked units set this up.'''
    cell = rnn_cell.MultiRNNCell([lstm_cell] * config.num_layers)
    '''END NET STRUCTURE'''

    '''Zero the initial state.'''
    self.initial_state = cell.zero_state(self.batch_size, tf.float32)

    '''Device is the processing unit for the session/context manager.  Why do we need it..?'''
    '''The embedding is the key structure.  For each word we learn words that are semantically close to it.
    They will then be the most probable following words.'''
    with tf.device("/cpu:0"):
      '''Creates (or looks up) the embedding for the word/ID representation.  If it creates it will be initialized randomly.'''
      embedding = tf.get_variable("embedding", [config.vocab_size, config.hidden_size])      
      inputs = tf.nn.embedding_lookup(embedding, self.input_data)

    '''If training and probability indicates dropout then perform dropout on inputs.'''
    if is_training and config.keep_prob < 1:
      inputs = tf.nn.dropout(inputs, config.keep_prob)

    '''tf.split(1, self.num_steps, inputs) - splits inputs tensor into num_steps tensors.'''
    '''tf.squeeze(input_, [1]) - removes dimensions of size 1 from input_.  What does this DO..?'''
    inputs = [tf.squeeze(input_, [1]) for input_ in tf.split(1, self.num_steps, inputs)]
    
    outputs, states = rnn.rnn(cell, inputs, initial_state=self.initial_state)

    output = tf.reshape(tf.concat(1, outputs), [-1, config.hidden_size])
    logits = tf.nn.xw_plus_b(output,
                             tf.get_variable("softmax_w", [config.hidden_size, config.vocab_size]),
                             tf.get_variable("softmax_b", [config.vocab_size]))
    loss = seq2seq.sequence_loss_by_example([logits],
                                            [tf.reshape(self.targets, [-1])],
                                            [tf.ones([self.batch_size * self.num_steps])],
                                            config.vocab_size)
    self.cost = cost = tf.reduce_sum(loss) / self.batch_size
    self.final_state = states[-1]

    if not is_training:
      return

    self.lr = tf.Variable(0.0, trainable=False)
    tvars = tf.trainable_variables()
    grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars),
                                      config.max_grad_norm)
    optimizer = tf.train.GradientDescentOptimizer(self.lr)
    self.train_op = optimizer.apply_gradients(zip(grads, tvars))

  def assign_lr(self, session, lr_value):
    session.run(tf.assign(self.lr, lr_value))


class Config(object):
  init_scale = 0.1
  learning_rate = 1.0
  max_grad_norm = 5
  num_layers = 2
  num_steps = 2 #20
  hidden_size = 200
  max_epoch = 4
  max_max_epoch = 13
  keep_prob = 1.0
  lr_decay = 0.5
  batch_size = 20
  vocab_size = 10000


def run_epoch(session, m, data, eval_op, verbose=False):
  
  epoch_size = ((len(data) // m.batch_size) - 1) // m.num_steps
  start_time = time.time()
  costs = 0.0
  iters = 0
  state = m.initial_state.eval()
  
  for step, (x, y) in enumerate(ptb_iterator(data, m.batch_size,
                                                    m.num_steps)):
    
    cost, state, _ = session.run([m.cost, m.final_state, eval_op],
                                 {m.input_data: x,
                                  m.targets: y,
                                  m.initial_state: state})
    
    costs += cost
    iters += m.num_steps

    if verbose and step % (epoch_size // 10) == 10:
      print("%.3f perplexity: %.3f speed: %.0f wps" %
            (step * 1.0 / epoch_size, np.exp(costs / iters),
             iters * m.batch_size / (time.time() - start_time)))

  return np.exp(costs / iters)

In [None]:
raw_data = ptb_raw_data()
train_data, valid_data, test_data, _ = raw_data

config = Config()

eval_config = Config()
eval_config.batch_size = 1
eval_config.num_steps = 1

with tf.Graph().as_default(), tf.Session() as session:
  initializer = tf.random_uniform_initializer(-config.init_scale, config.init_scale)
  
  with tf.variable_scope("model", reuse=None, initializer=initializer):
    m = PTBModel(is_training=True, config=config)
    
  with tf.variable_scope("model", reuse=True, initializer=initializer):
    mvalid = PTBModel(is_training=False, config=config)
    mtest = PTBModel(is_training=False, config=eval_config)
    
  tf.initialize_all_variables().run()
  
  #for i in range(config.max_max_epoch):
  for i in range(1):
    lr_decay = config.lr_decay ** max(i - config.max_epoch, 0.0)
    m.assign_lr(session, config.learning_rate * lr_decay)

    print("Epoch: %d Learning rate: %.3f" % (i + 1, session.run(m.lr)))
    train_perplexity = run_epoch(session, m, train_data, m.train_op, verbose=True)
    print("Epoch: %d Train Perplexity: %.3f" % (i + 1, train_perplexity))
    valid_perplexity = run_epoch(session, mvalid, valid_data, tf.no_op())
    print("Epoch: %d Valid Perplexity: %.3f" % (i + 1, valid_perplexity))
    
  test_perplexity = run_epoch(session, mtest, test_data, tf.no_op())
  print("Test Perplexity: %.3f" % test_perplexity)

In [2]:
import tensorflow as tf
from tensorflow.models.rnn import rnn
from tensorflow.models.rnn import rnn_cell
from tensorflow.models.rnn import seq2seq

In [3]:
init_scale = 0.1
learning_rate = 1.0
max_grad_norm = 5
num_layers = 5
num_steps = 20
hidden_size = 200
max_epoch = 4
max_max_epoch = 13
keep_prob = 0.5
lr_decay = 0.5
batch_size = 20
vocab_size = 10000

In [51]:
with tf.Graph().as_default(), tf.Session() as session:

  input_data = tf.placeholder(tf.int32, [batch_size, num_steps])
  targets = tf.placeholder(tf.int32, [batch_size, num_steps])

  lstm_cell = rnn_cell.BasicLSTMCell(hidden_size, forget_bias=0.0)
  lstm_cell = rnn_cell.DropoutWrapper(lstm_cell, output_keep_prob=keep_prob)
  cell = rnn_cell.MultiRNNCell([lstm_cell] * num_layers)

  initial_state = cell.zero_state(batch_size, tf.float32)

  initializer = tf.random_uniform_initializer(-init_scale, init_scale)

  embedding = tf.get_variable("embedding", [vocab_size, hidden_size], initializer=initializer)      
  
  inputs_tmp1 = tf.nn.embedding_lookup(embedding, input_data)
  inputs_tmp2 = tf.nn.dropout(inputs_tmp1, keep_prob)
  inputs = [tf.squeeze(input_, [1]) for input_ in tf.split(1, num_steps, inputs_tmp2)]
    
  outputs, states = rnn.rnn(cell, inputs, initial_state=initial_state)
  
  output = tf.reshape(tf.concat(1, outputs), [-1, hidden_size])
  logits = tf.nn.xw_plus_b(output,
                           tf.get_variable("softmax_w", [hidden_size, vocab_size]),
                           tf.get_variable("softmax_b", [vocab_size]))
  loss = seq2seq.sequence_loss_by_example([logits],
                                          [tf.reshape(targets, [-1])],
                                          [tf.ones([batch_size * num_steps])],
                                          vocab_size)
  cost = tf.reduce_sum(loss) / batch_size
  final_state = states[-1]
  
  lr = tf.Variable(0.0, trainable=False)
  tvars = tf.trainable_variables()
  grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars), max_grad_norm)
  optimizer = tf.train.GradientDescentOptimizer(lr)
  train_op = optimizer.apply_gradients(zip(grads, tvars))
  
  session.run(tf.initialize_all_variables())
  
  raw_data = ptb_raw_data()
  train_data, valid_data, test_data, _ = raw_data
  
  for i in range(max_max_epoch):
    
    lr_decay = lr_decay ** max(i - max_epoch, 0.0)
    session.run(tf.assign(lr, learning_rate * lr_decay))
    
    epoch_size = ((len(train_data) // batch_size) - 1) // num_steps
    start_time = time.time()
    costs = 0.0
    iters = 0
    loop_state = initial_state.eval()

    for step, (x, y) in enumerate(ptb_iterator(train_data, batch_size, num_steps)):
  
      ret1, ret2, ret3, ret4 = session.run([inputs_tmp1, 
                                embedding,
                                inputs_tmp2,
                                inputs[0]],
                 {input_data: x,
                  targets: y,
                  initial_state: loop_state})
      
      break

      '''loop_cost, loop_state, _ = session.run([cost, final_state, train_op],
                                   {input_data: x,
                                    targets: y,
                                    initial_state: loop_state})

      costs += loop_cost
      iters += num_steps

      if step % (epoch_size // 10) == 10:
        print("%.3f perplexity: %.3f speed: %.0f wps" %
              (step * 1.0 / epoch_size, np.exp(costs / iters),
               iters * batch_size / (time.time() - start_time)))

    perplexity = np.exp(costs / iters)'''

In [29]:
print(ret1.shape)
print
print(ret2.shape)
print
print(ret3.shape)
print
print(ret4.shape)
print
print(ret5.shape)

(20, 20, 200)
(10000, 200)
(20, 20, 200)
(20, 200)
(20, 1, 200)


In [50]:
ret5[0]

array([[ 0.        ,  0.06976767, -0.        , -0.        , -0.18383498,
         0.04760842,  0.        , -0.19403587, -0.        , -0.06934309,
        -0.        ,  0.        ,  0.00987712, -0.        , -0.        ,
         0.        , -0.        ,  0.        , -0.18004762,  0.04186821,
         0.        ,  0.17032333, -0.10798144,  0.13395394,  0.0044004 ,
         0.        , -0.        , -0.        , -0.        , -0.00241742,
        -0.06923652,  0.        , -0.04678969,  0.        , -0.0671256 ,
        -0.        , -0.        ,  0.15680198, -0.        , -0.07658863,
         0.        ,  0.03074665,  0.00498104,  0.        ,  0.05465998,
        -0.19333978, -0.13523045,  0.04179063, -0.14354853,  0.02281737,
         0.        ,  0.        , -0.15542312, -0.04228082,  0.14731266,
         0.        , -0.        , -0.11571407, -0.        , -0.13928676,
        -0.08894677, -0.0172914 , -0.        ,  0.09594561,  0.09298982,
        -0.09191232, -0.        , -0.        ,  0. 

In [47]:
ret4[0]

array([ 0.        ,  0.06976767, -0.        , -0.        , -0.18383498,
        0.04760842,  0.        , -0.19403587, -0.        , -0.06934309,
       -0.        ,  0.        ,  0.00987712, -0.        , -0.        ,
        0.        , -0.        ,  0.        , -0.18004762,  0.04186821,
        0.        ,  0.17032333, -0.10798144,  0.13395394,  0.0044004 ,
        0.        , -0.        , -0.        , -0.        , -0.00241742,
       -0.06923652,  0.        , -0.04678969,  0.        , -0.0671256 ,
       -0.        , -0.        ,  0.15680198, -0.        , -0.07658863,
        0.        ,  0.03074665,  0.00498104,  0.        ,  0.05465998,
       -0.19333978, -0.13523045,  0.04179063, -0.14354853,  0.02281737,
        0.        ,  0.        , -0.15542312, -0.04228082,  0.14731266,
        0.        , -0.        , -0.11571407, -0.        , -0.13928676,
       -0.08894677, -0.0172914 , -0.        ,  0.09594561,  0.09298982,
       -0.09191232, -0.        , -0.        ,  0.        , -0.  