In [1]:
import tensorflow as tf
import numpy as np

In [2]:
# the input data is 128+64 random binary vectors of dimension 13
# the output data has same dimensions; final 128 of vectors must be same as first 128 of input
def generate():
    X = np.random.randint(0, 2, (128, 128+64, 13))
    X[:, 128:, :] = 0
    Y = np.zeros((128, 128+64, 13))
    Y[:, 64:, :] = X[:, :128, :]
    return X, Y

In [3]:
# the input data is 128+64 random binary vectors of dimension 13
# the output data has same dimensions; final 128 of vectors must be same as first 128 of input
tf.reset_default_graph()

# Training Parameters
learning_rate = 0.001
training_steps = 2000
batch_size = 128
display_step = 200

# Network Parameters
num_input = 13       # dimension of one input at moment
timesteps = 128+64   # timesteps in one sequence
num_hidden = 70      # num of lstms in first hidden layer
num_hidden2 = 50     # num of lstms in second hidden layer
history_size = 80      # output dimension

# tf graph input
X = tf.placeholder("float", [batch_size, timesteps, num_input])
Y = tf.placeholder("float", [batch_size, timesteps, num_input])

lstm = tf.contrib.rnn.MultiRNNCell([
    tf.contrib.rnn.BasicLSTMCell(num_hidden, forget_bias=1.0),
    tf.contrib.rnn.BasicLSTMCell(num_hidden2, forget_bias=1.0)
])

lstm_outputs, _ = tf.nn.dynamic_rnn(lstm, X, dtype=tf.float32)

reads = tf.layers.dense(lstm_outputs, history_size, activation=None)
reads_proba = tf.nn.softmax(reads, dim=1)

H = np.zeros((batch_size, timesteps, history_size, timesteps), dtype=np.float32)
for t in range(timesteps):
    for i in range(history_size):
        for j in range(timesteps):
            H[:, t, i, j] = (i + j) == (t - 1)
history_transformation = tf.constant(H)

# reads_proba: (batch_size, timesteps, NONE, history_size)
# history_transf: (batch_size, timesteps, history_size, timesteps)
memory_retrieve_indexes = tf.matmul(tf.expand_dims(reads_proba, 2), history_transformation)
# memory_retrieve_indexes: (batch_size, timesteps, 1, timesteps)

# X: (batch_size, 1, timesteps, input_size)
memory_retrieve = tf.matmul(memory_retrieve_indexes, tf.tile(tf.expand_dims(X, 1), [1, timesteps, 1, 1]))
# memory_retrieve: (batch_size, timesteps, 1, input_size) - profit

memory_retrieve = tf.reshape(memory_retrieve, Y.get_shape())
loss_op = tf.reduce_mean(tf.squared_difference(memory_retrieve[:, 64:], Y[:, 64:]))

# define optimizer
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate,
                                   beta1=0.9,
                                   beta2=0.999,
                                   epsilon=1e-08,
                                   use_locking=False,
                                   name='Adam')
# optimizing
train_op = tf.contrib.layers.optimize_loss(
                loss_op, tf.train.get_global_step(), optimizer=optimizer, learning_rate=learning_rate)

init = tf.global_variables_initializer()

In [4]:
# Start training
config = tf.ConfigProto(log_device_placement=True,
                       device_count = {'GPU': 0})  # to output is variable on gpu or cpu
config.gpu_options.per_process_gpu_memory_fraction = 0.4  # BlasGEMM launch failed error fix (seems like not enopugh memory on gpu)
sess = tf.Session(config = config)
# Run the initializer
sess.run(init)

In [None]:
for step in range(0, training_steps):
    batch_x, batch_y = generate()
    
    # Run optimization op (backprop)
    sess.run(train_op, feed_dict={X: batch_x, Y: batch_y})
    if step % display_step == 0:
        # Calculate batch loss and accuracy
        L = sess.run(loss_op, feed_dict={X: batch_x, Y: batch_y})
        print("Step " + str(step) + ", Minibatch Loss= " + "{:.4f}".format(L))

print("Optimization Finished!")