#### Packages we need

In [22]:
import tensorflow as tf
import numpy as np
import pandas as pd
import random

# Data

In [2]:
basis_len = 10

In [3]:
def fibonacci(i, max_steps=20):
    '''
    Generates a single sequence according to the fibonacci relation residue basis_len as discussed above
    
    '''
    j = np.random.randint(basis_len)
    seq = [i,j]
    steps = 2 
    while steps < max_steps:
        r = (seq[steps-2]+seq[steps-1]) % basis_len
        seq.append(r)
        steps += 1
    return seq

def make_fibonacci_data(size=5000):
    x = np.random.randint(basis_len,size=size)
    df = pd.DataFrame(x)
    df['fibonacci'] = df[0].apply(fibonacci)
    return df 

In [4]:
df = make_fibonacci_data()

In [5]:
df.head()

Unnamed: 0,0,fibonacci
0,9,"[9, 3, 2, 5, 7, 2, 9, 1, 0, 1, 1, 2, 3, 5, 8, ..."
1,5,"[5, 5, 0, 5, 5, 0, 5, 5, 0, 5, 5, 0, 5, 5, 0, ..."
2,1,"[1, 5, 6, 1, 7, 8, 5, 3, 8, 1, 9, 0, 9, 9, 8, ..."
3,9,"[9, 7, 6, 3, 9, 2, 1, 3, 4, 7, 1, 8, 9, 7, 6, ..."
4,8,"[8, 0, 8, 8, 6, 4, 0, 4, 4, 8, 2, 0, 2, 2, 4, ..."


## Data Iterator

In [None]:
Data iterator that can handle variable length sequences efficiently.


In [6]:
# Simple bucket sequence iterator
class BucketSequenceIterator(object):
    # This class initializes a generator for a list of sequences
    # The generator uses buckets for speed
    def __init__(self, sequences, num_buckets = 5):
        self.sequences = sorted(sequences, key=lambda sequence: len(sequence))
        self.size = int(len(self.sequences)/num_buckets)
        self.bucket_data = []
        # Put the shortest sequences in the first bucket etc
        # bucket_data is a list of 'buckets' where each bucket is a list of Sentences.
        for bucket in range(num_buckets):
            self.bucket_data.append(self.sequences[bucket*self.size: (bucket+1)*self.size -1])
        self.num_buckets = num_buckets
        self.cursor = np.array([0]*num_buckets)
        self.shuffle()
        self.epochs = 0

    def shuffle(self):
        #sorts dataframe by sequence length, but keeps it random within the same length
        for i in range(self.num_buckets):
            random.shuffle(self.bucket_data[i])
            self.cursor[i] = 0

    def next_batch(self, n):
        assert n <= self.size
        # if any of the buckets is full go to next epoch
        if np.any(self.cursor+n+1 > self.size):
            self.epochs += 1
            self.shuffle()

        i = np.random.randint(0,self.num_buckets)
        batch_seq = self.bucket_data[i][self.cursor[i]:self.cursor[i]+n]
        batch_len = [len(s) for s in batch_seq]
        self.cursor[i] += n        
        maxlen = max(batch_len) 

        # Pad sequences with 0s so they are all the same length
        #### INPUT
        x = np.zeros([n, maxlen+1], dtype=np.int32)
        for i, x_i in enumerate(x):
            x_i[0:batch_len[i]] = batch_seq[i]
            x_i[batch_len[i]] = 0
            
        #### OUTPUT - 'simply' the input shifted by 1
        y = np.zeros([n, maxlen+1], dtype=np.int32)
        for i, y_i in enumerate(y):
            y_i[1:batch_len[i]] = x[i][2:batch_len[i]+1]
            y_i[0] = 0
            y_i[batch_len[i]] = 0
            
        return x, y, batch_len


In [10]:
test_iterator = BucketSequenceIterator(df.fibonacci.tolist())

In [9]:
test_iterator.next_batch(2)

(array([[9, 3, 2, 5, 7, 2, 9, 1, 0, 1, 1, 2, 3, 5, 8, 3, 1, 4, 5, 9, 0],
        [0, 4, 4, 8, 2, 0, 2, 2, 4, 6, 0, 6, 6, 2, 8, 0, 8, 8, 6, 4, 0]], dtype=int32),
 array([[0, 2, 5, 7, 2, 9, 1, 0, 1, 1, 2, 3, 5, 8, 3, 1, 4, 5, 9, 0, 0],
        [0, 4, 8, 2, 0, 2, 2, 4, 6, 0, 6, 6, 2, 8, 0, 8, 8, 6, 4, 0, 0]], dtype=int32),
 [20, 20])

In [11]:
## Hyperparameters
batch_size = 8  # Number of seq in each batch
sequence_length = 21 # Max sequence length
state_size = 32  # Number of cells in LSTM layer
n_layers = 1 # Number of LSTM layers
n_epochs = 100


Get the data

In [12]:
def fetch_fibonacci():
    df = make_fibonacci_data()
    return df.fibonacci.tolist()


In [13]:
data = fetch_fibonacci()

In [14]:
data

[[6, 5, 1, 6, 7, 3, 0, 3, 3, 6, 9, 5, 4, 9, 3, 2, 5, 7, 2, 9],
 [0, 7, 7, 4, 1, 5, 6, 1, 7, 8, 5, 3, 8, 1, 9, 0, 9, 9, 8, 7],
 [4, 5, 9, 4, 3, 7, 0, 7, 7, 4, 1, 5, 6, 1, 7, 8, 5, 3, 8, 1],
 [5, 5, 0, 5, 5, 0, 5, 5, 0, 5, 5, 0, 5, 5, 0, 5, 5, 0, 5, 5],
 [6, 6, 2, 8, 0, 8, 8, 6, 4, 0, 4, 4, 8, 2, 0, 2, 2, 4, 6, 0],
 [9, 3, 2, 5, 7, 2, 9, 1, 0, 1, 1, 2, 3, 5, 8, 3, 1, 4, 5, 9],
 [9, 5, 4, 9, 3, 2, 5, 7, 2, 9, 1, 0, 1, 1, 2, 3, 5, 8, 3, 1],
 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 [9, 2, 1, 3, 4, 7, 1, 8, 9, 7, 6, 3, 9, 2, 1, 3, 4, 7, 1, 8],
 [0, 9, 9, 8, 7, 5, 2, 7, 9, 6, 5, 1, 6, 7, 3, 0, 3, 3, 6, 9],
 [5, 8, 3, 1, 4, 5, 9, 4, 3, 7, 0, 7, 7, 4, 1, 5, 6, 1, 7, 8],
 [0, 5, 5, 0, 5, 5, 0, 5, 5, 0, 5, 5, 0, 5, 5, 0, 5, 5, 0, 5],
 [2, 9, 1, 0, 1, 1, 2, 3, 5, 8, 3, 1, 4, 5, 9, 4, 3, 7, 0, 7],
 [5, 6, 1, 7, 8, 5, 3, 8, 1, 9, 0, 9, 9, 8, 7, 5, 2, 7, 9, 6],
 [6, 6, 2, 8, 0, 8, 8, 6, 4, 0, 4, 4, 8, 2, 0, 2, 2, 4, 6, 0],
 [9, 4, 3, 7, 0, 7, 7, 4, 1, 5, 6, 1, 7, 8, 5, 3, 8, 1,

utility function to reset the graph

In [15]:
def reset_graph():
    '''
    utilty function to make sure that the graph is clean before we build it
    '''
    if 'sess' in globals() and sess:
        sess.close()
    tf.reset_default_graph()


In [16]:
sess = tf.InteractiveSession()

In [17]:
X = tf.placeholder(tf.int32, [batch_size, sequence_length])

In [18]:
Y = tf.placeholder(tf.int32, [batch_size, sequence_length])

In [19]:
embeddings = tf.get_variable('embedding_matrix', shape=[basis_len, state_size])

In [20]:
rnn_inputs = tf.nn.embedding_lookup(embeddings, X)

In [21]:
rnn_inputs

<tf.Tensor 'embedding_lookup:0' shape=(8, 21, 32) dtype=float32>

In [19]:
cells = tf.nn.rnn_cell.BasicLSTMCell(state_size)

In [20]:
initial_state = cells.zero_state(batch_size,tf.float32)

In [21]:
outputs, states = tf.nn.dynamic_rnn(cells, rnn_inputs, initial_state=initial_state)

In [48]:
outputs

<tf.Tensor 'RNN/transpose:0' shape=(8, 21, 32) dtype=float32>

In [23]:
with tf.variable_scope('prediction'):
    W = tf.get_variable(name='W', shape=[state_size, basis_len],initializer=tf.random_normal_initializer(stddev=0.1))
    b = tf.get_variable(name='b', shape=[basis_len], initializer=tf.random_normal_initializer(stddev=0.1))

We reshape and do broadcasting

In [24]:
logits_broadcast = tf.matmul(tf.reshape(outputs,[-1,state_size]),W) + b

In [25]:
logits_broadcast

<tf.Tensor 'add:0' shape=(168, 10) dtype=float32>

In [26]:
Y_flat = tf.reshape(Y,[-1])

In [27]:
Y_flat

<tf.Tensor 'Reshape_1:0' shape=(168,) dtype=int32>

In [28]:
loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits_broadcast,Y_flat)

In [29]:
mean_loss = tf.reduce_mean(loss)

Just for seeing the results

In [30]:
tf.nn.softmax(logits_broadcast)

<tf.Tensor 'Softmax:0' shape=(168, 10) dtype=float32>

In [31]:
Y_pred = tf.argmax(logits_broadcast,1)

In [32]:
Y_output = tf.reshape(Y_pred,[batch_size,sequence_length])

In [33]:
with tf.name_scope('optimizer'):
    optimizer = tf.train.AdamOptimizer(learning_rate=0.001)
    gradients = []
    clip = tf.constant(5.0, name="clip")
    for grad, var in optimizer.compute_gradients(mean_loss):
        gradients.append((tf.clip_by_value(grad, -clip, clip), var))
    updates = optimizer.apply_gradients(gradients)

In [34]:
sess.run(tf.global_variables_initializer())

In [35]:
sequences = fetch_fibonacci()

In [36]:
train_seq = sequences[:int(len(sequences)*0.8)]

In [37]:
valid_seq = sequences[int(len(sequences)*0.8):len(sequences)]

In [38]:
train_iterator = BucketSequenceIterator(train_seq)

In [39]:
valid_iterator = BucketSequenceIterator(valid_seq)

In [40]:
step, total_mean_loss = 0,0

In [41]:
train_losses, valid_losses = [], []

In [42]:
current_epoch = 0

In [43]:
train_iterator.next_batch(12)

(array([[0, 7, 7, 4, 1, 5, 6, 1, 7, 8, 5, 3, 8, 1, 9, 0, 9, 9, 8, 7, 0],
        [9, 9, 8, 7, 5, 2, 7, 9, 6, 5, 1, 6, 7, 3, 0, 3, 3, 6, 9, 5, 0],
        [3, 5, 8, 3, 1, 4, 5, 9, 4, 3, 7, 0, 7, 7, 4, 1, 5, 6, 1, 7, 0],
        [0, 1, 1, 2, 3, 5, 8, 3, 1, 4, 5, 9, 4, 3, 7, 0, 7, 7, 4, 1, 0],
        [6, 6, 2, 8, 0, 8, 8, 6, 4, 0, 4, 4, 8, 2, 0, 2, 2, 4, 6, 0, 0],
        [3, 6, 9, 5, 4, 9, 3, 2, 5, 7, 2, 9, 1, 0, 1, 1, 2, 3, 5, 8, 0],
        [4, 4, 8, 2, 0, 2, 2, 4, 6, 0, 6, 6, 2, 8, 0, 8, 8, 6, 4, 0, 0],
        [4, 1, 5, 6, 1, 7, 8, 5, 3, 8, 1, 9, 0, 9, 9, 8, 7, 5, 2, 7, 0],
        [7, 9, 6, 5, 1, 6, 7, 3, 0, 3, 3, 6, 9, 5, 4, 9, 3, 2, 5, 7, 0],
        [0, 5, 5, 0, 5, 5, 0, 5, 5, 0, 5, 5, 0, 5, 5, 0, 5, 5, 0, 5, 0],
        [8, 0, 8, 8, 6, 4, 0, 4, 4, 8, 2, 0, 2, 2, 4, 6, 0, 6, 6, 2, 0],
        [9, 7, 6, 3, 9, 2, 1, 3, 4, 7, 1, 8, 9, 7, 6, 3, 9, 2, 1, 3, 0]], dtype=int32),
 array([[0, 7, 4, 1, 5, 6, 1, 7, 8, 5, 3, 8, 1, 9, 0, 9, 9, 8, 7, 0, 0],
        [0, 8, 7, 5, 2, 7, 9, 6, 5, 

In [44]:
while current_epoch < n_epochs:
    step += 1
    batch = train_iterator.next_batch(batch_size)
    feed = {X: batch[0], Y: batch[1]}

    mean_loss_batch, _ = sess.run([mean_loss,updates], feed_dict=feed)
    total_mean_loss += mean_loss_batch

    if train_iterator.epochs > current_epoch:
        current_epoch += 1
        train_losses.append(total_mean_loss / step)
        step, total_mean_loss = 0, 0

            # eval test set
        valid_epoch = valid_iterator.epochs
        while valid_iterator.epochs == valid_epoch:
            step += 1
            batch = valid_iterator.next_batch(batch_size)
            feed = {X: batch[0], Y: batch[1]}
            mean_loss_batch = sess.run([mean_loss], feed_dict=feed)
            total_mean_loss += mean_loss_batch[0]

        valid_losses.append(total_mean_loss / step)
        step, total_mean_loss = 0,0
        print('Accuracy after epoch', current_epoch, ' - train loss:', train_losses[-1], '- validation loss:', valid_losses[-1])

        if current_epoch % 10 == 0:
            batch = valid_iterator.next_batch(batch_size)
            feed = {X:batch[0], Y:batch[1]}
            p = sess.run([Y_output], feed_dict = feed)
            print('Prediction:',p)
            print('Real Output:', batch[1])

('Accuracy after epoch', 1, ' - train loss:', 1.7942281208966113, '- validation loss:', 1.4034852541011313)
('Accuracy after epoch', 2, ' - train loss:', 1.0278149946530659, '- validation loss:', 0.68546730618585239)
('Accuracy after epoch', 3, ' - train loss:', 0.51419282927049847, '- validation loss:', 0.38631160196505093)
('Accuracy after epoch', 4, ' - train loss:', 0.30675459769340829, '- validation loss:', 0.23344050269377858)
('Accuracy after epoch', 5, ' - train loss:', 0.18858740262749815, '- validation loss:', 0.14769440867866462)
('Accuracy after epoch', 6, ' - train loss:', 0.12228451174272149, '- validation loss:', 0.098326015494652655)
('Accuracy after epoch', 7, ' - train loss:', 0.087246637624591145, '- validation loss:', 0.069142217851347387)
('Accuracy after epoch', 8, ' - train loss:', 0.06580740906378274, '- validation loss:', 0.050300812659164272)
('Accuracy after epoch', 9, ' - train loss:', 0.040865893020981071, '- validation loss:', 0.045025503870687988)
('Accur