In [1]:
import tensorflow as tf
import numpy as np

In [2]:
# the input data is 128+64 random binary vectors of dimension 13
# the output data has same dimensions; final 128 of vectors must be same as first 128 of input
def generate():
    X = np.random.randint(0, 2, (128, 128+64, 13))
    X[:, 128:, :] = 0
    Y = np.zeros((128, 128+64, 13))
    Y[:, 64:, :] = X[:, :128, :]
    return X, Y

In [177]:
# the input data is 128+64 random binary vectors of dimension 13
# the output data has same dimensions; final 128 of vectors must be same as first 128 of input
tf.reset_default_graph()

# Training Parameters
learning_rate = 0.001
training_steps = 10000
batch_size = 128
display_step = 200

# Network Parameters
num_input = 13       # dimension of one input at moment
timesteps = 128+64   # timesteps in one sequence
num_hidden = 70      # num of lstms in first hidden layer
num_read = 50        # num of lstms in second hidden layer
history_size = 80    # how much previous moments of input is stored

# tf graph input
X = tf.placeholder("float", [batch_size, timesteps, num_input])
Y = tf.placeholder("float", [batch_size, timesteps, num_input])

# loss will be calculated in the loop as sum of losses on each iteration
loss_op = tf.constant(0.0)

# defining first hidden layer
with tf.variable_scope('first_lstm'):
    first_lstm = tf.contrib.rnn.BasicLSTMCell(num_hidden, forget_bias=1.0)
    first_lstm_state = tf.zeros([batch_size, num_hidden]), tf.zeros([batch_size, num_hidden])

# defining second hidden layer
with tf.variable_scope("read_lstm"):
    read_lstm = tf.contrib.rnn.BasicLSTMCell(num_read, forget_bias=1.0)
    read_lstm_state = tf.zeros([batch_size, num_read]), tf.zeros([batch_size, num_read])

# linear transformation of second lstm cell outputs to history size dimension vector
read_w = tf.Variable(tf.random_normal([num_read, history_size]))
read_b = tf.Variable(tf.random_normal([history_size]))

# history is an array of previous (in time) history_size inputs
history = tf.zeros([batch_size, num_input, history_size])

# reshaping data to (timesteps, batch_size, num_input)
_X = tf.unstack(X, timesteps, 1)
_Y = tf.unstack(Y, timesteps, 1)

# time loop
for t, inp, truth in zip(np.arange(timesteps), _X, _Y):
    # moving data through lstm layers
    with tf.variable_scope('first_lstm'):
        first_lstm_output, first_lstm_state = first_lstm(inp, first_lstm_state)
    with tf.variable_scope("read_lstm"):
        read_lstm_output, read_lstm_state = read_lstm(first_lstm_output, read_lstm_state)
    
    # non-linear transformation to vector of history_size length
    read = tf.sigmoid(tf.matmul(read_lstm_output, read_w) + read_b)
    # transformation to "probabilities"-kind weights
    
    read_proba = tf.nn.softmax(read, 1)
    # taking selected data from memory
    memory_retrieve = tf.matmul(history, tf.expand_dims(read_proba, 2))
    
    # calculate loss as cross_entropy
    if t >= 64:
        loss_op += tf.reduce_mean(tf.squared_difference(memory_retrieve, tf.expand_dims(truth, 2)))
        #tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=memory_retrieve, labels=tf.expand_dims(truth, 2)))
    
    # rolling history
    history = tf.concat([history[:, :, 1:], tf.expand_dims(inp, 2)], axis=2)

# define optimizer
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate,
                                   beta1=0.9,
                                   beta2=0.999,
                                   epsilon=1e-08,
                                   use_locking=False,
                                   name='Adam')
# optimizing
train_op = tf.contrib.layers.optimize_loss(
                loss_op, tf.contrib.framework.get_global_step(), optimizer=optimizer, learning_rate=learning_rate)

init = tf.global_variables_initializer()

In [None]:
# Start training
sess = tf.Session()
# Run the initializer
sess.run(init)

In [None]:
for step in range(0, training_steps):
    batch_x, batch_y = generate()
    
    # Run optimization op (backprop)
    sess.run(train_op, feed_dict={X: batch_x, Y: batch_y})
    if step % display_step == 0:
        # Calculate batch loss and accuracy
        L = sess.run(loss_op, feed_dict={X: batch_x, Y: batch_y})
        print("Step " + str(step) + ", Minibatch Loss= " + "{:.4f}".format(L))

print("Optimization Finished!")

Step 0, Minibatch Loss= 34.7727
Step 200, Minibatch Loss= 32.5039
Step 400, Minibatch Loss= 32.3189
Step 600, Minibatch Loss= 32.2119
Step 800, Minibatch Loss= 32.2542
Step 1000, Minibatch Loss= 32.2154
Step 1200, Minibatch Loss= 32.2279
Step 1400, Minibatch Loss= 32.2093
Step 1600, Minibatch Loss= 32.2487
Step 1800, Minibatch Loss= 32.2051
Step 2000, Minibatch Loss= 32.1753
Step 2200, Minibatch Loss= 32.1825
Step 2400, Minibatch Loss= 32.1669
Step 2600, Minibatch Loss= 32.1302
Step 2800, Minibatch Loss= 32.1196
Step 3000, Minibatch Loss= 32.1675
Step 3200, Minibatch Loss= 32.1077
Step 3400, Minibatch Loss= 32.1377
Step 3600, Minibatch Loss= 32.0530
Step 3800, Minibatch Loss= 32.0322
Step 4000, Minibatch Loss= 32.0259


In [168]:
batch_x, batch_y = generate()

In [169]:
h = sess.run(history, feed_dict={X: batch_x, Y: batch_y})

In [170]:
h.shape

(128, 13, 80)

In [171]:
(np.rollaxis(batch_x[:, -80:, :], 2, 1) != h).sum()

0

In [172]:
mr = sess.run(memory_retrieve, feed_dict={X: batch_x, Y: batch_y})

In [173]:
mr.shape

(128, 13, 1)

In [174]:
h[:, :, 15] == mr[:, :, 0]

array([[ True,  True,  True, ...,  True,  True,  True],
       [ True,  True,  True, ...,  True,  True,  True],
       [ True,  True,  True, ...,  True,  True,  True],
       ..., 
       [ True,  True,  True, ...,  True,  True,  True],
       [ True,  True,  True, ...,  True,  True,  True],
       [ True,  True,  True, ...,  True,  True,  True]], dtype=bool)

In [175]:
batch_y[:, -1, :] == h[:, :, 15]

array([[ True,  True,  True, ...,  True,  True,  True],
       [ True,  True,  True, ...,  True,  True,  True],
       [ True,  True,  True, ...,  True,  True,  True],
       ..., 
       [ True,  True,  True, ...,  True,  True,  True],
       [ True,  True,  True, ...,  True,  True,  True],
       [ True,  True,  True, ...,  True,  True,  True]], dtype=bool)

In [176]:
sess.run(loss_op, feed_dict={X: batch_x, Y: batch_y})

0.0