In [1]:
#!wget http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz
#!tar xvf simple-examples.tgz

In [2]:
"""Utilities for parsing PTB text files."""
from __future__ import absolute_import
from __future__ import print_function

import collections
import os
import sys
import time

import tensorflow.python.platform

import numpy as np
from six.moves import xrange  # pylint: disable=redefined-builtin
import tensorflow as tf
from tensorflow.models.rnn import rnn

from tensorflow.python.platform import gfile


def _read_words(filename):
  with gfile.GFile(filename, "r") as f:
    return f.read().replace("\n", "<eos>").split()


def _build_vocab(filename):
  data = _read_words(filename)

  counter = collections.Counter(data)
  count_pairs = sorted(counter.items(), key=lambda x: -x[1])

  words, _ = list(zip(*count_pairs))
  word_to_id = dict(zip(words, range(len(words))))

  return word_to_id


def _file_to_word_ids(filename, word_to_id):
  data = _read_words(filename)
  return [word_to_id[word] for word in data]


def ptb_raw_data():
  """Load PTB raw data from data directory "data_path".
  Reads PTB text files, converts strings to integer ids,
  and performs mini-batching of the inputs.
  The PTB dataset comes from Tomas Mikolov's webpage:
  http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz
  Args:
    data_path: string path to the directory where simple-examples.tgz has
      been extracted.
  Returns:
    tuple (train_data, valid_data, test_data, vocabulary)
    where each of the data objects can be passed to PTBIterator.
  """

  train_path = os.path.join('./simple-examples/data/', "ptb.train.txt")
  valid_path = os.path.join('./simple-examples/data/', "ptb.valid.txt")
  test_path = os.path.join('./simple-examples/data', "ptb.test.txt")

  word_to_id = _build_vocab(train_path)
  train_data = _file_to_word_ids(train_path, word_to_id)
  valid_data = _file_to_word_ids(valid_path, word_to_id)
  test_data = _file_to_word_ids(test_path, word_to_id)
  vocabulary = len(word_to_id)
  return train_data, valid_data, test_data, vocabulary


def ptb_iterator(raw_data, batch_size, num_steps):
  """Iterate on the raw PTB data.
  This generates batch_size pointers into the raw PTB data, and allows
  minibatch iteration along these pointers.
  Args:
    raw_data: one of the raw data outputs from ptb_raw_data.
    batch_size: int, the batch size.
    num_steps: int, the number of unrolls.
  Yields:
    Pairs of the batched data, each a matrix of shape [batch_size, num_steps].
    The second element of the tuple is the same data time-shifted to the
    right by one.
  Raises:
    ValueError: if batch_size or num_steps are too high.
  """
  raw_data = np.array(raw_data, dtype=np.int32)

  data_len = len(raw_data)
  batch_len = data_len // batch_size
  data = np.zeros([batch_size, batch_len], dtype=np.int32)
  for i in range(batch_size):
    data[i] = raw_data[batch_len * i:batch_len * (i + 1)]

  epoch_size = (batch_len - 1) // num_steps

  if epoch_size == 0:
    raise ValueError("epoch_size == 0, decrease batch_size or num_steps")

  for i in range(epoch_size):
    x = data[:, i*num_steps:(i+1)*num_steps]
    y = data[:, i*num_steps+1:(i+1)*num_steps+1]
    yield (x, y)

In [3]:
import tensorflow as tf
from tensorflow.models.rnn import rnn
from tensorflow.models.rnn import rnn_cell
from tensorflow.models.rnn import seq2seq

In [4]:
init_scale = 0.1
learning_rate = 1.0
max_grad_norm = 5
num_layers = 5
num_steps = 20
hidden_size = 200
max_epoch = 4
max_max_epoch = 13
keep_prob = 0.5
lr_decay = 0.5
batch_size = 20
vocab_size = 10000

In [84]:
with tf.Graph().as_default(), tf.Session() as session:

  '''Placeholders for'''
  input_data = tf.placeholder(tf.int32, [batch_size, num_steps])
  targets = tf.placeholder(tf.int32, [batch_size, num_steps])

  lstm_cell = rnn_cell.BasicLSTMCell(hidden_size, forget_bias=0.0)
  lstm_cell = rnn_cell.DropoutWrapper(lstm_cell, output_keep_prob=keep_prob)
  cell = rnn_cell.MultiRNNCell([lstm_cell] * num_layers)

  initial_state = cell.zero_state(batch_size, tf.float32)

  initializer = tf.random_uniform_initializer(-init_scale, init_scale)

  embedding = tf.get_variable("embedding", [vocab_size, hidden_size], initializer=initializer)      
  
  inputs_tmp1 = tf.nn.embedding_lookup(embedding, input_data) # [20][20][200]
  inputs_tmp2 = tf.nn.dropout(inputs_tmp1, keep_prob) # [20][20][200]
  inputs = [tf.squeeze(input_, [1]) for input_ in tf.split(1, num_steps, inputs_tmp2)] # (list of 20)[20][200]
    
  outputs, states = rnn.rnn(cell, inputs, initial_state=initial_state)
  
  output = tf.reshape(tf.concat(1, outputs), [-1, hidden_size])
  logits = tf.nn.xw_plus_b(output,
                           tf.get_variable("softmax_w", [hidden_size, vocab_size]),
                           tf.get_variable("softmax_b", [vocab_size]))
  loss = seq2seq.sequence_loss_by_example([logits],
                                          [tf.reshape(targets, [-1])],
                                          [tf.ones([batch_size * num_steps])],
                                          vocab_size)
  cost = tf.reduce_sum(loss) / batch_size
  final_state = states[-1]
  
  lr = tf.Variable(0.0, trainable=False) # trainable defaults to True..!
  tvars = tf.trainable_variables()
  grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars), max_grad_norm)
  optimizer = tf.train.GradientDescentOptimizer(lr)
  train_op = optimizer.apply_gradients(zip(grads, tvars))
  
  session.run(tf.initialize_all_variables())
  
  raw_data = ptb_raw_data()
  train_data, valid_data, test_data, _ = raw_data
  
  for i in range(max_max_epoch):
    
    lr_decay = lr_decay ** max(i - max_epoch, 0.0)
    session.run(tf.assign(lr, learning_rate * lr_decay))
    
    epoch_size = ((len(train_data) // batch_size) - 1) // num_steps
    start_time = time.time()
    costs = 0.0
    iters = 0
    loop_state = initial_state.eval()

    for step, (x, y) in enumerate(ptb_iterator(train_data, batch_size, num_steps)):
  
      ret1, ret2, ret3, ret4, ret5, ret6, ret7, loop_cost, loop_state, _ = session.run([inputs_tmp1, 
                                embedding,
                                inputs_tmp2,
                                inputs[0],
                                outputs[0],
                                output,
                                logits,
                                cost, final_state, train_op],
                 {input_data: x,
                  targets: y,
                  initial_state: loop_state})
    
      for v in tvars:
        print(v.name)
      
      break

      '''loop_cost, loop_state, _ = session.run([cost, final_state, train_op],
                                   {input_data: x,
                                    targets: y,
                                    initial_state: loop_state})

      costs += loop_cost
      iters += num_steps

      if step % (epoch_size // 10) == 10:
        print("%.3f perplexity: %.3f speed: %.0f wps" %
              (step * 1.0 / epoch_size, np.exp(costs / iters),
               iters * batch_size / (time.time() - start_time)))

    perplexity = np.exp(costs / iters)'''
      
    break

embedding:0
RNN/MultiRNNCell/Cell0/BasicLSTMCell/Linear/Matrix:0
RNN/MultiRNNCell/Cell0/BasicLSTMCell/Linear/Bias:0
RNN/MultiRNNCell/Cell1/BasicLSTMCell/Linear/Matrix:0
RNN/MultiRNNCell/Cell1/BasicLSTMCell/Linear/Bias:0
RNN/MultiRNNCell/Cell2/BasicLSTMCell/Linear/Matrix:0
RNN/MultiRNNCell/Cell2/BasicLSTMCell/Linear/Bias:0
RNN/MultiRNNCell/Cell3/BasicLSTMCell/Linear/Matrix:0
RNN/MultiRNNCell/Cell3/BasicLSTMCell/Linear/Bias:0
RNN/MultiRNNCell/Cell4/BasicLSTMCell/Linear/Matrix:0
RNN/MultiRNNCell/Cell4/BasicLSTMCell/Linear/Bias:0
softmax_w:0
softmax_b:0


In [30]:
x.shape

(20, 20)

In [31]:
y.shape

(20, 20)

In [32]:
ret1.shape

(20, 20, 200)

In [35]:
ret3.shape

(20, 20, 200)

In [37]:
ret4.shape

(20, 200)

In [65]:
ret5.shape

(20, 200)

In [48]:
ret6.shape

(400, 200)

In [64]:
ret6

array([[  1.50319203e-04,   1.06235539e-05,  -6.09649906e-05, ...,
          9.13511685e-05,   0.00000000e+00,   1.14943025e-04],
       [ -0.00000000e+00,  -0.00000000e+00,  -1.59318108e-04, ...,
          0.00000000e+00,  -2.63182068e-04,   2.08657380e-04],
       [  0.00000000e+00,  -9.24876585e-05,  -0.00000000e+00, ...,
          4.72031476e-04,   4.13459202e-05,  -0.00000000e+00],
       ..., 
       [ -8.33519385e-04,  -0.00000000e+00,  -0.00000000e+00, ...,
          1.16604591e-04,   9.02963395e-04,  -1.63923600e-04],
       [ -8.11368867e-04,   3.21977532e-05,  -0.00000000e+00, ...,
         -0.00000000e+00,   0.00000000e+00,   5.17878332e-04],
       [ -0.00000000e+00,   0.00000000e+00,  -3.43326945e-04, ...,
         -1.44983263e-04,   5.53562888e-04,   0.00000000e+00]], dtype=float32)

In [67]:
len(outputs)

20

In [69]:
ret7.shape

(400, 10000)

In [73]:
ret7[0]

array([-0.42348099, -0.26516798,  1.55406642, ...,  0.04979973,
        1.68157017,  0.15827167], dtype=float32)

In [80]:
with tf.Graph().as_default(), tf.Session() as session:
  for v in tvars:
    print(v.eval())

ValueError: Cannot use the default session to evaluate tensor: the tensor's graph is different from the session's graph. Pass an explicit session to eval(session=sess).

If we start to think about applying this to financial time series...
What should be the inputs..?
And what should be the outputs..?

Inputs:
* A tensor of each index from day N.  E.g. [1.23, 2.34, 3.45, 4.56, 5.67, 6.78, 7.89, 8.90]