In [1]:
import tensorflow as tf

In [2]:
# Wrapper for the TF RNN cell
# For an LSTM, the 'cell' is a tuple containing state and cell
# We use TF's dropout to implement zoneout
class ZoneoutWrapper(tf.nn.rnn_cell.RNNCell):
  
  #Operator adding zoneout to all states (states+cells) of the given cell.
  def __init__(self, cell, drop_prob, is_training=True, seed=0):
    self._cell = cell
    self._keep_prob = 1 - drop_prob
    self._seed = seed
    self.is_training = is_training
  
  @property
  def state_size(self):
    return self._cell.state_size

  @property
  def output_size(self):
    return self._cell.output_size

  def __call__(self, inputs, state):
    if isinstance(self.state_size, tuple) != isinstance(self._zoneout_prob, tuple):
      raise TypeError('Subdivided states need subdivided zoneouts.')
    if isinstance(self.state_size, tuple) and len(tuple(self.state_size)) != len(tuple(self._zoneout_prob)):
      raise ValueError('State and zoneout need equally many parts.')
    output, new_state = self._cell(inputs, state, scope)
    if self.is_training:
      new_state = self._keep_prob * tf.nn.dropout(new_state - state, self._keep_prob, seed = self._seed) + state
    else:
      new_state = self._keep_prob * new_state + (1 - self._keep_prob) * state
    return output, new_state


# Wrap your cells like this
#cell = ZoneoutWrapper(cell, keep_prob=(z_prob_cells, z_prob_states))

In [None]:
class SequencePredFeature:
    def __init__(self, config):
        self.config = config
        self.graph = tf.Graph()
        
        with self.graph.as_default():
            
            #Generate placeholder variables to represent the input tensors
            self.inputs_placeholder = tf.placeholder(tf.int32, shape=(None, 
                                            self.config.max_length, self.config.feature_size), name="x")
            self.labels_placeholder = tf.placeholder(tf.int32, shape=(None, self.config.max_length), name="y")
            self.dropout_placeholder = tf.placeholder(tf.float32)

            #Creates one-hot encoding for the input. No embedding is used as of now
            batch_size = tf.shape(self.inputs_placeholder)[0]
            embedding = tf.one_hot(self.inputs_placeholder, self.config.num_classes)
            embedding = tf.reshape(embedding, [self.config.batch_size, self.config.max_length, 
                                               self.config.feature_size * self.config.num_classes])

            self.pred = self.add_prediction_op()
            self.loss = self.add_loss_op(self.pred)
            self.global_step, self.train_op = self.add_training_op(self.loss)
            self.merged_summaries = tf.summary.merge_all()

    def create_feed_dict(self, inputs_batch, labels_batch=None, initial_state=None, keep_prob=1.0):
        """Creates the feed_dict for the model.
        NOTE: You do not have to do anything here.
        """
        feed_dict = {
            self.inputs_placeholder: inputs_batch,
            self.dropout_placeholder: keep_prob,
            }
        if labels_batch is not None:
            feed_dict[self.labels_placeholder] = labels_batch

        if initial_state is not None:
            feed_dict[self.in_state] = initial_state

        return feed_dict

    def add_embedding(self):

        """ Creates one-hot encoding for the input. No embedding is used as of now
        """
        batch_size = tf.shape(self.inputs_placeholder)[0]
        embedding = tf.one_hot(self.inputs_placeholder, self.config.num_classes)
        embedding = tf.reshape(embedding ,[self.config.batch_size,self.config.max_length,
                                           self.config.feature_size*self.config.num_classes])
        return embedding

    def add_prediction_op(self):

        """ Get the input from the embedding layer
        """
        x = self.add_embedding()

        """ Create a RNN first & define a placeholder for the initial state
        """
        if self.config.model_type == "gru":
            cell = tf.nn.rnn_cell.GRUCell(self.config.hidden_size)
        elif self.config.model_type == "rnn":
            cell = tf.nn.rnn_cell.BasicRNNCell(self.config.hidden_size)
        else:
            raise Exception("Unsuppoprted model type...")

        cell = tf.nn.rnn_cell.DropoutWrapper(cell, output_keep_prob=self.dropout_placeholder)

        cell = tf.nn.rnn_cell.MultiRNNCell([cell] * self.config.num_layers, state_is_tuple=False)

        batch_size = tf.shape(x)[0]
        dynamic_max_length = tf.shape(x)[1] 
        zero_state = cell.zero_state(batch_size, tf.float32)
        self.in_state = tf.placeholder_with_default(zero_state, [None, cell.state_size])

        """ First find the sequence length and then use it to run the model
        """
        #length = tf.reduce_sum(tf.reduce_max(tf.sign(x), 2), 1)
        output, self.out_state = tf.nn.dynamic_rnn(cell, x, initial_state=self.in_state)
        output = tf.reshape(output, shape=[-1, self.config.hidden_size])

        """ Pass it through a linear + Softmax layer to get the predictions
        """
        xavier_init = tf.contrib.layers.xavier_initializer()
        W = tf.get_variable("W", shape=[self.config.hidden_size, self.config.num_classes], initializer=xavier_init )
        b1 = tf.get_variable("b1", shape=[self.config.num_classes], initializer=xavier_init )
        preds = tf.add(tf.matmul(output,W),b1)
        preds = tf.reshape(preds, shape=[batch_size,dynamic_max_length, self.config.num_classes])
        return preds

    def add_loss_op(self, preds):
        loss = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.labels_placeholder, logits=preds) )
        scaled_loss = loss/np.log(2)
        tf.summary.scalar('loss', scaled_loss);
        return scaled_loss

    def add_training_op(self, loss):
        """Sets up the training Ops.
        """
        global_step = tf.Variable(0, dtype=tf.int32, trainable=False, name='global_step')

        optimizer = tf.train.AdamOptimizer(self.config.lr)
        train_op = optimizer.minimize(loss, global_step=global_step)
        return global_step, train_op

    def loss_on_batch(self, sess, inputs_batch, labels_batch, initial_state=None):
        feed = self.create_feed_dict(inputs_batch=inputs_batch, labels_batch=labels_batch, initial_state=initial_state, keep_prob=1.0)
        loss, out_state = sess.run([self.loss,self.out_state], feed_dict=feed)
        return loss, out_state

    def train_on_batch(self, sess, inputs_batch, labels_batch, initial_state=None, dropout=1.0):
        feed = self.create_feed_dict(inputs_batch=inputs_batch, labels_batch=labels_batch, initial_state=initial_state, keep_prob=dropout)
        _, loss,out_state,_step, summary = sess.run([self.train_op, self.loss, self.out_state, self.global_step, self.merged_summaries], feed_dict=feed)
        return loss, out_state, _step, summary


    def build(self):
        
    

In [9]:
for text in open('../../Data/Original Data/input_info.txt'):
    print 1

1
1
1


In [8]:
u = 'abc'
u.index('c')

2

In [34]:
def encode(text, vocab):
    return [vocab.index(x) for x in text]

In [92]:
def reader():
    window = 4
    delay = 3
    vocab = 'abcdef'
    text = 'abcdefabcdefabc'
    text = encode(text, vocab)
    for start in range(len(text) - 2 * window +1):
        chunk = text[start: start + 2 * window]
        yield chunk

In [95]:
def get_batch():
    stream = reader()
    feature_size = 3
    window = 4
    for element in stream:
        print element
        input_batch = []
        label_batch = []
        _input = []
        for start in range(window):
            _input.append(element[start:start+feature_size])
        input_batch.append(_input)
        label_batch.append(element[feature_size+1:feature_size+window+1])
        for i, num in enumerate(input_batch):
            print input_batch[i]
            print label_batch[i]
            print

In [96]:
get_batch()

[0, 1, 2, 3, 4, 5, 0, 1]
[[0, 1, 2], [1, 2, 3], [2, 3, 4], [3, 4, 5]]
[4, 5, 0, 1]

[1, 2, 3, 4, 5, 0, 1, 2]
[[1, 2, 3], [2, 3, 4], [3, 4, 5], [4, 5, 0]]
[5, 0, 1, 2]

[2, 3, 4, 5, 0, 1, 2, 3]
[[2, 3, 4], [3, 4, 5], [4, 5, 0], [5, 0, 1]]
[0, 1, 2, 3]

[3, 4, 5, 0, 1, 2, 3, 4]
[[3, 4, 5], [4, 5, 0], [5, 0, 1], [0, 1, 2]]
[1, 2, 3, 4]

[4, 5, 0, 1, 2, 3, 4, 5]
[[4, 5, 0], [5, 0, 1], [0, 1, 2], [1, 2, 3]]
[2, 3, 4, 5]

[5, 0, 1, 2, 3, 4, 5, 0]
[[5, 0, 1], [0, 1, 2], [1, 2, 3], [2, 3, 4]]
[3, 4, 5, 0]

[0, 1, 2, 3, 4, 5, 0, 1]
[[0, 1, 2], [1, 2, 3], [2, 3, 4], [3, 4, 5]]
[4, 5, 0, 1]

[1, 2, 3, 4, 5, 0, 1, 2]
[[1, 2, 3], [2, 3, 4], [3, 4, 5], [4, 5, 0]]
[5, 0, 1, 2]



In [None]:
tf.nn.dynamic_rnn()