In [0]:
%tensorflow_version 1.x
import tensorflow as tf
import random

In [0]:
class ToySequenceData(object):
  """
  generate sequence of data with dynamic length
  this class generate samples for training:
  -class 0: linear sequences (i.e. [0, 1, 2, 3, ...])
  -class 1: random sequence(i.e. [1,  3, 10, 7, ...])

  NOTICE:
  we have to pad each sequence to reach 'max_seq_len' for Tensorflow
  consistency (we cannot feed a numpy array with inconsistent
  dimensions). The dunamic caculation will then be perform thanks to 'seqlen
  attribute that records every actual sequencelength.
  """
  def __init__(self, n_samples=1000, max_seq_len=20, min_seq_len=3, max_value=1000):
    self.data = []
    self.labels = []
    self.seqlen = []
    for i in range(n_samples):
      #Random sequence length
      len = random.randint(min_seq_len, max_seq_len)
      #Monitor sequence length for Tensorflow dynamic calculation
      self.seqlen.append(len)
      #Add a random or linear int sequence (50^ prob)
      if random.random() < 0.5:
        #Generate a linear sequence
        rand_start = random.randint(0, max_value - len)
        s = [[float(i)/max_value] for i in range(rand_start, rand_start + len)]
        #pad sequence for dimension consistency
        s += [[0.] for i in range(max_seq_len - len)]
        self.data.append(s)
        self.labels.append([1., 0.])
      else:
        #Generate a random sequence
        s = [[float(random.randint(0, max_value)) / max_value]
             for i in range(len)]
        #pad sequence for dimension consistency
        s += [[0.] for i in range(max_seq_len - len)]
        self.data.append(s)
        self.labels.append([0., 1.])
    self.batch_id = 0
  def next(self, batch_size):
    """
    return a batch of data, when dataset end is reached, start over.
    """
    if self.batch_id == len(self.data):
      self.batch_id = 0
    batch_data = (self.data[self.batch_id:min(self.batch_id + 
                      batch_size, len(self.data))])
    batch_labels = (self.labels[self.batch_id:min(self.batch_id + 
                      batch_size, len(self.data))])
    batch_seqlen = (self.seqlen[self.batch_id:min(self.batch_id + 
                      batch_size, len(self.data))])
    self.batch_id = min(self.batch_id + batch_size, len(self.data))

    return batch_data, batch_labels, batch_seqlen

In [0]:
#MODEL
#Parameters

learning_rate = 0.01
training_steps = 10000
batch_size = 128
display_step = 200

#Network parameters
seq_max_len = 20  #Sequence max length
n_hidden = 64   #hidden layer num of features
n_classes = 2   #linear sequence or not

trainset = ToySequenceData(n_samples=10000, max_seq_len=seq_max_len)
testset = ToySequenceData(n_samples=500, max_seq_len=seq_max_len)

#tf Graph input
x = tf.placeholder("float", [None, seq_max_len, 1])
y = tf.placeholder("float", [None, n_classes])
#A placeholder for indicating each sequence length
seqlen = tf.placeholder(tf.int32, [None])

#Define weights
weights = {
    'out':tf.Variable(tf.random_normal([n_hidden, n_classes]))
}

biases = {
    'out': tf.Variable(tf.random_normal([n_classes]))
}


In [0]:
def dynamicRNN(x, seqlen, weights, biases):
  #prepare data shape to match 'rnn' function requirements
  #current data input shape: (batch_size, n_steps, n_input)
  #required shape: 'n_steps' tensors list of shape (batch_size, n_input)
  x = tf.unstack(x, seq_max_len, 1)

  #define a lstm cell with tensorflow
  lstm_cell = tf.contrib.rnn.BasicLSTMCell(n_hidden)

  #get lstm cell output, proviing 'sequence_length' will perform dynamic
  #calculation
  outputs, states = tf.contrib.rnn.static_rnn(lstm_cell, x, dtype=tf.float32,
        sequence_length=seqlen)
  
  #When performing dynamic calculation, we must retrieve the last
  #dynamically computed output, i.e., if a  sequence length is 10,
  #we need to retrive the 10th output.
  #However Tensorflow doesn't support advanced indexing yet, so we build
  #a custom op that for each sample in batch size, get its length
  #and get the corresponding relevant output.

  #'output' is a list of output at every timestep, we pack them in
  #a Tensorflow and change back dimension to [batch_size, n_step, n_input]
  outputs = tf.stack(outputs)
  outputs = tf.transpose(outputs, [1, 0, 2])

  #Hack to build the indexing and retrive the right output
  batch_size = tf.shape(outputs)[0]
  #Start indices for each sample
  index = tf.range(0, batch_size) * seq_max_len + (seqlen - 1)
  #Indexing
  outputs = tf.gather(tf.reshape(outputs, [-1, n_hidden]), index)

  #Linear activation, using outputs computed above
  return tf.matmul(outputs, weights['out']) + biases['out']

In [5]:
pred = dynamicRNN(x, seqlen=seqlen, weights=weights, biases=biases)

#define loss and optimizer
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate).minimize(loss)

#Evauate model
correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

#Initialize the variables 
init = tf.global_variables_initializer()

The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.

Instructions for updating:
This class is equivalent as tf.keras.layers.LSTMCell, and will be replaced by that in Tensorflow 2.0.
Instructions for updating:
Please use `keras.layers.RNN(cell, unroll=True)`, which is equivalent to this API
Instructions for updating:
Please use `layer.add_weight` method instead.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels inpu

In [8]:
with tf.Session() as sess:
  sess.run(init)

  for step in range(1, training_steps+1):
    batch_x, batch_y, batch_seqlen = trainset.next(batch_size)
    #Run optimization op (backprob)
    sess.run(optimizer, feed_dict={
        x: batch_x, y: batch_y,
        seqlen: batch_seqlen})
    if step % display_step == 0 or step == 1:
      #calculate batch accuracy & loss
      acc, cost = sess.run([accuracy, loss], feed_dict={
          x: batch_x, y: batch_y,
          seqlen: batch_seqlen})
      print("Step " + str(step) + ", Minibatch Loss= " + \
                  "{:.6f}".format(cost) + ", Training Accuracy= " + \
                  "{:.5f}".format(acc))
  print("Optimization finished!")

  #Calculate accuracy
  test_data = testset.data
  test_label = testset.labels
  test_seqlen = testset.seqlen
  print("Testing Accuracy: ", \
        sess.run(accuracy, feed_dict={
        x: batch_x, y: batch_y,
          seqlen: batch_seqlen}))

Step 1, Minibatch Loss= 0.785570, Training Accuracy= 0.53125
Step 200, Minibatch Loss= 0.681822, Training Accuracy= 0.57812
Step 400, Minibatch Loss= 0.680873, Training Accuracy= 0.60156
Step 600, Minibatch Loss= 0.682110, Training Accuracy= 0.59375
Step 800, Minibatch Loss= 0.667329, Training Accuracy= 0.60156
Step 1000, Minibatch Loss= 0.653641, Training Accuracy= 0.60156
Step 1200, Minibatch Loss= 0.610399, Training Accuracy= 0.68750
Step 1400, Minibatch Loss= 0.564339, Training Accuracy= 0.72656
Step 1600, Minibatch Loss= 0.478908, Training Accuracy= 0.75781
Step 1800, Minibatch Loss= 0.486327, Training Accuracy= 0.76562
Step 2000, Minibatch Loss= 0.445929, Training Accuracy= 0.78906
Step 2200, Minibatch Loss= 0.465927, Training Accuracy= 0.79688
Step 2400, Minibatch Loss= 0.473097, Training Accuracy= 0.79688
Step 2600, Minibatch Loss= 0.444582, Training Accuracy= 0.79688
Step 2800, Minibatch Loss= 0.503272, Training Accuracy= 0.71875
Step 3000, Minibatch Loss= 0.455155, Training A