In [1]:
import tensorflow as tf
import numpy as np

In [2]:
# the input data is 128+64 random binary vectors of dimension 13
# the output data has same dimensions; final 128 of vectors must be same as first 128 of input
def generate():
    X = np.random.randint(0, 2, (128, 128+64, 13))
    X[:, 128:, :] = 0
    Y = np.zeros((128, 128+64, 13))
    Y[:, 64:, :] = X[:, :128, :]
    return X, Y

In [3]:
# the input data is 128+64 random binary vectors of dimension 13
# the output data has same dimensions; final 128 of vectors must be same as first 128 of input
tf.reset_default_graph()

# Training Parameters
learning_rate = 0.001
training_steps = 10000
batch_size = 128
display_step = 200

# Network Parameters
num_input = 13       # dimension of one input at moment
timesteps = 128+64   # timesteps in one sequence
num_hidden = 70      # num of lstms in first hidden layer
num_read = 50        # num of lstms in second hidden layer
history_size = 80    # how much previous moments of input is stored

# tf graph input
X = tf.placeholder("float", [batch_size, timesteps, num_input])
Y = tf.placeholder("float", [batch_size, timesteps, num_input])

# loss will be calculated in the loop as sum of losses on each iteration
loss_op = tf.constant(0.0)

# defining first hidden layer
with tf.variable_scope('first_lstm'):
    first_lstm = tf.contrib.rnn.BasicLSTMCell(num_hidden, forget_bias=1.0)
    first_lstm_state = tf.zeros([batch_size, num_hidden]), tf.zeros([batch_size, num_hidden])

# defining second hidden layer
with tf.variable_scope("read_lstm"):
    read_lstm = tf.contrib.rnn.BasicLSTMCell(num_read, forget_bias=1.0)
    read_lstm_state = tf.zeros([batch_size, num_read]), tf.zeros([batch_size, num_read])

# linear transformation of second lstm cell outputs to history size dimension vector
read_w = tf.Variable(tf.random_normal([num_read, history_size]))
read_b = tf.Variable(tf.random_normal([history_size]))

# history is an array of previous (in time) history_size inputs
history = tf.zeros([batch_size, num_input, history_size])

# reshaping data to (timesteps, batch_size, num_input)
_X = tf.unstack(X, timesteps, 1)
_Y = tf.unstack(Y, timesteps, 1)

# time loop
for t, inp, truth in zip(np.arange(timesteps), _X, _Y):
    # moving data through lstm layers
    with tf.variable_scope('first_lstm'):
        first_lstm_output, first_lstm_state = first_lstm(inp, first_lstm_state)
    with tf.variable_scope("read_lstm"):
        read_lstm_output, read_lstm_state = read_lstm(first_lstm_output, read_lstm_state)

    # non-linear transformation to vector of history_size length
    read = tf.matmul(read_lstm_output, read_w) + read_b
    # transformation to "probabilities"-kind weights
    read_proba = tf.nn.softmax(read, 1)    
    # taking selected data from memory
    memory_retrieve = tf.matmul(history, tf.expand_dims(read_proba, 2))

    # calculate loss as cross_entropy
    if t >= 64:
        loss_op += tf.reduce_mean(tf.squared_difference(memory_retrieve, tf.expand_dims(truth, 2)))
        #tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=memory_retrieve, labels=tf.expand_dims(truth, 2)))

    # rolling history
    history = tf.concat([history[:, :, 1:], tf.expand_dims(inp, 2)], axis=2)

init = tf.global_variables_initializer()

In [4]:
# Start training
config = tf.ConfigProto(log_device_placement=True)  # to output is variable on gpu or cpu
config.gpu_options.per_process_gpu_memory_fraction = 0.4  # BlasGEMM launch failed error fix (seems like not enopugh memory on gpu)
sess = tf.Session(config = config)
# Run the initializer
sess.run(init)

In [5]:
from scipy.stats import norm
POPULATION_SIZE = 200
TEST_OBJECTS_FOR_FITNESS = 200
NOISE = 0.1

In [6]:
class dna:
    def __init__(self, weights_values):
        self.w = weights_values
        
    def fitness(self, batch_x, batch_y):
        return sess.run(loss_op, feed_dict=
                               {X: batch_x, 
                                Y: batch_y,
                                first_lstm.variables[0]: self.w[0],
                                first_lstm.variables[1]: self.w[1],
                                read_lstm.variables[0]: self.w[2],
                                read_lstm.variables[1]: self.w[3],
                                read_w: self.w[4],
                                read_b: self.w[5]})
        
    def mutate(self):
        new_w = []
        for caps in self.w:
            new_w.append(caps + NOISE * norm.rvs(size=caps.shape))
        return dna(new_w)
    
    def __mul__(self, a):
        new_w = []
        for caps in self.w:
            new_w.append(caps * a)
        return dna(new_w)
    
    def __add__(self, other_dna):
        new_w = []
        for caps1, caps2 in zip(self.w, other_dna.w):
            new_w.append(caps1 + caps2)
        return dna(new_w)

In [7]:
center = dna(sess.run([*first_lstm.variables, *read_lstm.variables, read_w, read_b]))

In [8]:
for i in range(70):
    # sample new 200 examples from data
    batch_x, batch_y = generate()
    # check how good we are
    print("Gen ", i, " started, res = ", center.fitness(batch_x, batch_y))
    
    population = [center.mutate() for i in range(POPULATION_SIZE)]
    losses = np.array([population[i].fitness(batch_x, batch_y) for i in range(POPULATION_SIZE)])
    fitness = losses.max() - losses
    
    center = np.array(population).dot(fitness) * (1 / fitness.sum())
    

batch_x, batch_y = generate()
print("Final res = ", center.fitness(batch_x, batch_y))

Gen  0  started, res =  36.7912
Gen  1  started, res =  36.5913
Gen  2  started, res =  36.6216
Gen  3  started, res =  36.3381
Gen  4  started, res =  36.2395
Gen  5  started, res =  36.0752
Gen  6  started, res =  35.9586
Gen  7  started, res =  36.0479
Gen  8  started, res =  35.8683
Gen  9  started, res =  35.5271
Gen  10  started, res =  35.4795
Gen  11  started, res =  35.3467
Gen  12  started, res =  34.8382
Gen  13  started, res =  34.799
Gen  14  started, res =  34.4498
Gen  15  started, res =  34.227
Gen  16  started, res =  33.823
Gen  17  started, res =  33.7685
Gen  18  started, res =  33.4723
Gen  19  started, res =  33.1612
Gen  20  started, res =  32.2172
Gen  21  started, res =  32.1463
Gen  22  started, res =  30.6981
Gen  23  started, res =  29.6556
Gen  24  started, res =  29.6219
Gen  25  started, res =  26.946
Gen  26  started, res =  25.1035
Gen  27  started, res =  22.0492
Gen  28  started, res =  22.7234
Gen  29  started, res =  21.5068
Gen  30  started, res = 

In [11]:
center.fitness(batch_x, batch_y)

0.0015134425