In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
import os
import collections

In [2]:
# import the ptb dataset for testing
# copy of tensorflow examples

def _read_words(filename):
  with tf.gfile.GFile(filename, "r") as f:
    return f.read().decode("utf-8").replace("\n", "<eos>").split()


def _build_vocab(filename):
  data = _read_words(filename)

  counter = collections.Counter(data)
  count_pairs = sorted(counter.items(), key=lambda x: (-x[1], x[0]))

  words, _ = list(zip(*count_pairs))
  word_to_id = dict(zip(words, range(len(words))))

  return word_to_id


def _file_to_word_ids(filename, word_to_id):
  data = _read_words(filename)
  return [word_to_id[word] for word in data if word in word_to_id]


def ptb_raw_data(data_path=None):
  """Load PTB raw data from data directory "data_path".
  Reads PTB text files, converts strings to integer ids,
  and performs mini-batching of the inputs.
  The PTB dataset comes from Tomas Mikolov's webpage:
  http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz
  Args:
    data_path: string path to the directory where simple-examples.tgz has
      been extracted.
  Returns:
    tuple (train_data, valid_data, test_data, vocabulary)
    where each of the data objects can be passed to PTBIterator.
  """

  train_path = os.path.join(data_path, "ptb.train.txt")
  valid_path = os.path.join(data_path, "ptb.valid.txt")
  test_path = os.path.join(data_path, "ptb.test.txt")

  word_to_id = _build_vocab(train_path)
  train_data = _file_to_word_ids(train_path, word_to_id)
  valid_data = _file_to_word_ids(valid_path, word_to_id)
  test_data = _file_to_word_ids(test_path, word_to_id)
  vocabulary = len(word_to_id)
  return train_data, valid_data, test_data, vocabulary


def ptb_producer(raw_data, batch_size, num_steps, name=None):
  """Iterate on the raw PTB data.
  This chunks up raw_data into batches of examples and returns Tensors that
  are drawn from these batches.
  Args:
    raw_data: one of the raw data outputs from ptb_raw_data.
    batch_size: int, the batch size.
    num_steps: int, the number of unrolls.
    name: the name of this operation (optional).
  Returns:
    A pair of Tensors, each shaped [batch_size, num_steps]. The second element
    of the tuple is the same data time-shifted to the right by one.
  Raises:
    tf.errors.InvalidArgumentError: if batch_size or num_steps are too high.
  """
  with tf.name_scope(name, "PTBProducer", [raw_data, batch_size, num_steps]):
    raw_data = tf.convert_to_tensor(raw_data, name="raw_data", dtype=tf.int32)

    data_len = tf.size(raw_data)
    batch_len = data_len // batch_size
    data = tf.reshape(raw_data[0 : batch_size * batch_len],
                      [batch_size, batch_len])

    epoch_size = (batch_len - 1) // num_steps
    assertion = tf.assert_positive(
        epoch_size,
        message="epoch_size == 0, decrease batch_size or num_steps")
    with tf.control_dependencies([assertion]):
      epoch_size = tf.identity(epoch_size, name="epoch_size")

    i = tf.train.range_input_producer(epoch_size, shuffle=False).dequeue()
    x = tf.strided_slice(data, [0, i * num_steps], [-1,-1],
                         [batch_size, (i + 1) * num_steps])
    x.set_shape([batch_size, num_steps])
    y = tf.strided_slice(data, [0, i * num_steps + 1], [-1,-1],
                         [batch_size, (i + 1) * num_steps + 1])
    y.set_shape([batch_size, num_steps])
    return x, y

In [3]:
train, val, test, vocab = ptb_raw_data('/home/kaushik/Desktop/timeseriesDL/data/ptb')

x_batch, y_batch = ptb_producer(train, 64, 20)

In [4]:
x_batch

<tf.Tensor 'PTBProducer/StridedSlice:0' shape=(64, 20) dtype=int32>

In [5]:
from os import sys, path
sys.path.append(path.dirname(path.abspath('.')))

In [13]:
from layers import lstmLayer, DeepLSTM

# TODO: add batch re-norm and dropout
class LSTM(object):
    '''Class defining the overall model based on layers.py'''
    def __init__(self, args):

        self.seq_len = args['seq_len']
        self.num_layers = args['num_layers']
        self.cell = args['cell']
        self.hidden_units = args['hidden_units']
        self.op_classes = args['op_channels']
        self.mode = args['mode']
        self.init_lr = args['lr_rate']
        self.grad_clip = args['grad_clip']
        self.batch_size = args['batch_size']

    def build_graph(self):

        self._build_model()

        if self.mode == 'train':
            self._add_train_nodes()
        self.summaries = tf.summary.merge_all()

    def _build_model(self):

        # define placeholder for data layer
        self.input_layer_x = tf.placeholder(dtype=tf.float32, shape=[self.batch_size, self.seq_len],name='input_layer_x')

        # define model based on cell and num_layers
        if self.num_layers ==1:
          self.lstm_layer = lstmLayer(self.hidden_units)
        else:
          cells = [lstmLayer(self.hidden_units)]*self.num_layers
          self.lstm_layer = DeepLSTM(cells)

        # TODO: Think about the need for statefullness in this problem scenario
        self.initial_state = tf.zeros([self.batch_size,self.lstm_layer.state_size],tf.float32)
        #self.initial_state = tf.placeholder(tf.float32,[None, self.lstm_layer.state_size])

        state = self.initial_state
        output = tf.zeros([self.batch_size,self.hidden_units],dtype=tf.float32)
        # run the model for multiple time steps
        with tf.variable_scope("RNN"):
          for time in range(self.seq_len):
            if time > 0: tf.get_variable_scope().reuse_variables()
            # pass the inputs, state and weather we are in train/test or inference time (for dropout)
            output, state = self.lstm_layer(self.input_layer_x[:,time], state)

        self.final_state = state
        self.final_output = output

        softmax_w = tf.get_variable('softmax_w', [self.hidden_units, self.op_classes],dtype=tf.float32,
                                    initializer=tf.random_uniform_initializer())
        softmax_b = tf.get_variable('softmax_b', [self.op_classes],dtype=tf.float32,
                                    initializer=tf.random_uniform_initializer())

        self.logits = tf.matmul(self.final_output, softmax_w) + softmax_b

        # get probabilities for these logits through softmax (will be needed for sampling)
        self.output_prob = tf.nn.softmax(self.logits)
        activation_summary(self.output_prob)

    def _add_train_nodes(self):

        # define placeholder for target layer
        self.input_layer_y = tf.placeholder(tf.float32, [self.batch_size,self.op_classes],name='input_layer_y')

        # sequence loss by example
        # TODO: Implement proper loss function for encoder like structure of LSTM
        self.cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(self.logits,self.input_layer_y))
        #self.cost = weighted_cross_entropy(self.class_weights,self.logits,self.input_layer_y)
        tf.summary.scalar("loss",self.cost)

        self.lr = tf.Variable(self.init_lr, trainable=False)
        trainable_variables = tf.trainable_variables()
        optimizer = tf.train.AdamOptimizer(self.lr)
        grads_vars = optimizer.compute_gradients(self.cost,trainable_variables)

        # histogram_summaries for weights and gradients
        for var in tf.trainable_variables():
            tf.summary.histogram(var.op.name, var)
        for grad, var in grads_vars:
            if grad is not None:
                tf.summary.histogram(var.op.name+'/gradient',grad)

        # TODO: Think about how gradient clipping is implemented, cross check
        grads, _ = tf.clip_by_global_norm([grad for (grad,var) in grads_vars], self.grad_clip)
        self.train_op = optimizer.apply_gradients(grads_vars)

    def assign_lr(self,session,lr_value):
        session.run(tf.assign(self.lr, lr_value))

    def initialize_state(self,session):
        session.run(self.initial_state)

In [14]:
# Bi-directional LSTM code based on LSTM code

class BiLSTM(object):
    
    def __init__(self,args):
        
        self.seq_len = args['seq_len']
        self.num_layers = args['num_layers']
        self.cell = args['cell']
        self.hidden_units = args['hidden_units']
        self.op_classes = args['op_channels']
        self.mode = args['mode']
        self.init_lr = args['lr_rate']
        self.grad_clip = args['grad_clip']
        self.batch_size = args['batch_size']
        
    def build_graph():
        
        self._build_model()
        
        if self.mode == 'train':
            self._add_training_nodes()
        self.summaries = tf.summary.merge_all()
        
        
    def _build_model(self):
        
        # define placeholder for data layer
        self.input_layer_x = tf.placeholder(dtype=tf.int32, shape=[self.batch_size, self.seq_len],name='input_layer_x')

        # define model based on cell and num_layers
        self.fw_lstm_layer = lstmLayer(self.hidden_units)
        self.bk_lstm_layer = lstmLayer(self.hidden_units)

        self.initial_state = tf.zeros([self.batch_size,self.fw_lstm_layer.state_size],tf.float32)
        #self.initial_state = tf.placeholder(tf.float32,[None, self.lstm_layer.state_size])

        def _run_loop(input_data,name):
            state = self.initial_state
            output = tf.zeros([self.batch_size,self.hidden_units],dtype=tf.float32)
            outputs = []
            # run the model for multiple time steps
            with tf.variable_scope(name+"LSTM"):
              for time in range(self.seq_len):
                if time > 0: tf.get_variable_scope().reuse_variables()
                # pass the inputs, state and weather we are in train/test or inference time (for dropout)
                output, state = self.fw_lstm_layer(input_data[:,time], state)
                outputs.append(output)
                
            return outputs
        
        # run the forward chain
        fw_outputs = _run_loop(self.input_layer_x,'fw_')
        # run the backward chain
        bk_input_x = tf.reverse(self.input_layer_x, [False, True])
        bk_ouputs = _run_loop(bk_input_x,'bk_')[::-1]
        
        # concat the forward and backward runs
        self.final_ouput = tf.zeros([self.seq_len,self.batch_size,2*self.hidden_units],dtype=tf.float32)
        for it, (fw,bk) in enumerate(zip(fw_outputs,bk_outputs)):
            self.final_ouput[it,:,:] = tf.concat(1,[fw,bk])
        
        # now combine with softmax to produce output
        softmax_w = tf.get_variable('softmax_w', [2*self.hidden_units, self.op_classes],dtype=tf.float32,
                                    initializer=tf.random_uniform_initializer())
        softmax_b = tf.get_variable('softmax_b', [self.op_classes],dtype=tf.float32,
                                    initializer=tf.random_uniform_initializer())

        self.logits = tf.matmul(self.final_output, softmax_w) + softmax_b

        # get probabilities for these logits through softmax (will be needed for sampling)
        self.output_prob = tf.nn.softmax(self.logits)
        activation_summary(self.output_prob)

    def _add_train_nodes(self):

        # define placeholder for target layer
        self.input_layer_y = tf.placeholder(tf.float32, [self.batch_size,self.op_classes],name='input_layer_y')

        # sequence loss by example
        # TODO: Implement proper loss function for encoder like structure of LSTM
        self.cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(self.logits,self.input_layer_y))
        #self.cost = weighted_cross_entropy(self.class_weights,self.logits,self.input_layer_y)
        tf.summary.scalar("loss",self.cost)

        self.lr = tf.Variable(self.init_lr, trainable=False)
        trainable_variables = tf.trainable_variables()
        optimizer = tf.train.AdamOptimizer(self.lr)
        grads_vars = optimizer.compute_gradients(self.cost,trainable_variables)

        # histogram_summaries for weights and gradients
        for var in tf.trainable_variables():
            tf.summary.histogram(var.op.name, var)
        for grad, var in grads_vars:
            if grad is not None:
                tf.summary.histogram(var.op.name+'/gradient',grad)

        # TODO: Think about how gradient clipping is implemented, cross check
        grads, _ = tf.clip_by_global_norm([grad for (grad,var) in grads_vars], self.grad_clip)
        self.train_op = optimizer.apply_gradients(grads_vars)

    def assign_lr(self,session,lr_value):
        session.run(tf.assign(self.lr, lr_value))

    def initialize_state(self,session):
        session.run(self.initial_state)

In [15]:
# Testing

args = {'num_epochs': 5, 'lr_rate': 0.01, 'lr_decay': 0.97, 'seq_len': 20, 'num_layers': 2, 'cell': 'lstm', 
        'hidden_units': 64, 'op_channels': 10000,'grad_clip': 5.0, 'batch_size': 64}

# Initialize session and graph
with tf.Graph().as_default(), tf.Session() as session:
    
    args['mode'] = 'train'

    with tf.variable_scope("model", reuse=None):

        train_model = LSTM(args)
        

        train_model.build_graph()
        tf.initialize_all_variables().run()

        cost_over_batches = []

        for i in range(args['num_epochs']):
            lr_decay = args['lr_decay'] ** max(i - 2.0, 0.0)
            train_model.assign_lr(session, args['lr_rate'] * lr_decay)

            start_time = time.time()
            softmax_op = np.zeros((max_batches*model.batch_size,model.op_channels))
            cost_trajectory = []
            y_onehot = np.zeros((max_batches*model.batch_size,model.op_channels))
            epoch_cost = 0.0

            for i in range(max_batches):
                x, y = ptb_producer(train, 64, 20)
                summary, cur_cost, output_prob, _ = session.run([model.summaries,model.cost,model.output_prob,model.train_op],
                            feed_dict={model.input_layer_x: x, model.input_layer_y: y})
                cost_trajectory.append(cur_cost)
                softmax_op[i*len(y):(i+1)*len(y),:] = output_prob
                y_onehot[i*len(y):(i+1)*len(y),:] = y
                epoch_cost += cur_cost

            end_time = time.time()

            lprint("Runtime of one epoch: ")
            lprint(end_time-start_time)
            lprint("Average cost per epoch: ")
            lprint(epoch_cost/max_batches)

            cost_over_batches += cost_trajectory

        plt.plot(np.linspace(1,len(cost_over_batches),len(cost_over_batches)),cost_over_batches)
        plt.title('Cost per batch over the training run')
        plt.xlabel('# batch')
        plt.ylabel('avg. cost per batch')
        plt.savefig(logdir+'model_'+str(ix)+'_traincost.png')

IndexError: list index out of range