In [1]:
import tensorflow as tf
import numpy as np

# Define parameters

In [2]:
parameters = {}
parameters['seq_length'] = 5
parameters['n_input'] = 3
parameters['n_output'] = 3
parameters['n_hidden'] = 4
parameters['init_stdev'] = 0.1
parameters['learning_rate'] = 0.01

# Define model

In [3]:
# Some auxilar functions
def _seq_length(sequence):
    used = tf.sign(tf.reduce_max(tf.abs(sequence), reduction_indices=2))
    length = tf.reduce_sum(used, reduction_indices=1)
    length = tf.cast(length, tf.int32)
    return length

def _last_relevant(output, length):
    batch_size = tf.shape(output)[0]
    max_length = tf.shape(output)[1]
    out_size = int(output.get_shape()[2])
    index = tf.range(0, batch_size) * max_length + (length - 1)
    flat = tf.reshape(output, [-1, out_size])
    relevant = tf.gather(flat, index)

    return relevant

In [4]:
# Define placeholders
x = tf.placeholder("float", [None, parameters['seq_length'], parameters['n_input']], name='x')
y = tf.placeholder("float", [None, parameters['n_output']], name='y')

# Define weights and bias - For now we will try with attention to hidden state 
# 
weights = {
    'alphas': tf.Variable(tf.random_normal([parameters['n_hidden'], 1], stddev=parameters['init_stdev'])),
    'out': tf.Variable(tf.random_normal([parameters['n_hidden'], parameters['n_output']], 
                                        stddev=parameters['init_stdev']), name='w_out')
        }
biases = {
    'out': tf.Variable(tf.random_normal([parameters['n_output']]), name='b_out'),
    'alphas': tf.Variable(tf.random_normal([1]), name='b_alphas')
}

# Define RNN
rnn_cell = tf.contrib.rnn.LSTMCell(parameters['n_hidden'])
outputs, states = tf.nn.dynamic_rnn(
    rnn_cell,
    x,
    dtype=tf.float32,
    sequence_length=_seq_length(x)
)

# Define attention weihts
outputs_reshaped = tf.reshape(outputs, [-1, int(outputs.get_shape()[2])])
ejs = tf.matmul(outputs_reshaped, weights['alphas']) + biases['alphas'] 
ejs_reshaped = tf.reshape(ejs, [-1, int(outputs.get_shape()[1])])
alphas = tf.nn.softmax(ejs_reshaped, name='attention_weights') 
reshaped_alphas = tf.reshape(alphas, [-1, 1])
# Define context
context = reshaped_alphas * outputs_reshaped
context_reshaped = tf.reshape(context, [-1, parameters['seq_length'], int(context.get_shape()[1])])
context_reduced = tf.reduce_sum(context_reshaped, axis= 1)
# Define logits and loss
logits = tf.matmul(context_reduced, weights['out']) + biases['out']
pred_prob = tf.nn.softmax(logits, name="predictions")
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y))

#Define optimizer
optimizer = tf.train.AdamOptimizer(learning_rate=parameters['learning_rate']).minimize(loss)

# Initialization
init = tf.global_variables_initializer()


Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See tf.nn.softmax_cross_entropy_with_logits_v2.



# Define data

In [5]:
# Sample 1a
x1a = np.array([
        [1,0,0],
        [0,1,0],
        [0,0,1],
        [1,0,0],
        [1,0,0]
    ])
y1a = np.array([1,0,0])
# Sample 1b
x1b = np.array([
        [1,0,0],
        [0,1,0],
        [0,0,1],
        [0,1,0],
        [0,1,0]
    ])
y1b = np.array([1,0,0])
# Sample 1c
x1c = np.array([
        [1,0,0],
        [0,1,0],
        [0,0,1],
        [0,0,1],
        [0,0,1]
    ])
y1c = np.array([1,0,0])
# Sample 2a
x2a = np.array([
        [0,0,1],
        [0,0,1],
        [0,0,1],
        [0,0,1],
        [0,0,1]
    ])
y2a = np.array([0,1,0])
# Sample 2b
x2b = np.array([
        [0,0,1],
        [0,0,1],
        [0,1,0],
        [0,1,0],
        [0,1,0]
    ])
y2b = np.array([0,1,0])
# Sample 2a
x2c = np.array([
        [0,0,1],
        [0,0,1],
        [1,0,0],
        [1,0,0],
        [1,0,0]
    ])
y2c = np.array([0,1,0])
# Sample 3a
x3a = np.array([
        [0,0,1],
        [0,1,0],
        [0,0,1],
        [0,1,0],
        [1,0,0]
    ])
y3a = np.array([0,0,1])
# Sample 3b
x3b = np.array([
        [0,0,1],
        [0,1,0],
        [0,0,1],
        [0,1,0],
        [0,1,0]
    ])
y3b = np.array([0,0,1])
# Sample 3c
x3c = np.array([
        [0,0,1],
        [0,1,0],
        [0,0,1],
        [0,1,0],
        [0,0,1]
    ])
y3c = np.array([0,0,1])

# Add 2 samples of each to list
X = [x1a, np.copy(x1a), x1b, np.copy(x1b), x1c, np.copy(x1c), x2a, np.copy(x2a), x2b, np.copy(x2b), x3a, np.copy(x3a), x3b, np.copy(x3b), x3c, np.copy(x3c)]
Y = [y1a, np.copy(y1a), y1b, np.copy(y1b), y1c, np.copy(y1c), y2a, np.copy(y2a), y2b, np.copy(y2b), y3a, np.copy(y3a), y3b, np.copy(y3a), y3c, np.copy(y3c)]


# Train and save model

In [6]:
# Start training
saver = tf.train.Saver()
with tf.Session() as sess:

    # Run the initializer
    sess.run(init)

    for step in range(1, 300):
        batch_x = np.array(X)
        batch_y = np.array(Y)
        # Run optimization op (backprop)
        a = sess.run(optimizer, feed_dict={x: batch_x, y: batch_y})
        if step % 20 == 0 or step == 1:
            # Calculate batch loss and accuracy
            train_loss = sess.run(loss, feed_dict={x: batch_x, y: batch_y})
            print("Step " + str(step) + ", Loss= {:.4f}".format(train_loss))

    print("Optimization Finished!")
   
    # Once trained - Get attention weights for the training samples
    attention_weights = sess.run(alphas, feed_dict={x: batch_x})

    # Saved Model Builder 
    export_path = "models/attentionRNN/SavedModelBuilder/"
    builder = tf.saved_model.builder.SavedModelBuilder(export_path)
    builder.add_meta_graph_and_variables(
          sess, [tf.saved_model.tag_constants.SERVING])
    builder.save()
    
    
    


Step 1, Loss= 1.0990
Step 20, Loss= 0.9693
Step 40, Loss= 0.5747
Step 60, Loss= 0.3588
Step 80, Loss= 0.2425
Step 100, Loss= 0.1645
Step 120, Loss= 0.1107
Step 140, Loss= 0.0746
Step 160, Loss= 0.0522
Step 180, Loss= 0.0386
Step 200, Loss= 0.0301
Step 220, Loss= 0.0244
Step 240, Loss= 0.0203
Step 260, Loss= 0.0174
Step 280, Loss= 0.0151
Optimization Finished!
INFO:tensorflow:No assets to save.
INFO:tensorflow:No assets to write.
INFO:tensorflow:SavedModel written to: b'models/attentionRNN/SavedModelBuilder/saved_model.pb'


# Check some attention weights

In [7]:
attention_weights

array([[ 0.17468816,  0.1399889 ,  0.23449016,  0.23826963,  0.21256314],
       [ 0.17468816,  0.1399889 ,  0.23449016,  0.23826963,  0.21256314],
       [ 0.18295713,  0.14661536,  0.2455899 ,  0.21695247,  0.20788521],
       [ 0.18295713,  0.14661536,  0.2455899 ,  0.21695247,  0.20788521],
       [ 0.17491445,  0.14017023,  0.23479392,  0.23057485,  0.21954654],
       [ 0.17491445,  0.14017023,  0.23479392,  0.23057485,  0.21954654],
       [ 0.00067953,  0.05624923,  0.26769829,  0.34160104,  0.33377188],
       [ 0.00067953,  0.05624923,  0.26769829,  0.34160104,  0.33377188],
       [ 0.00306775,  0.25393713,  0.59615821,  0.11737238,  0.02946457],
       [ 0.00306775,  0.25393713,  0.59615821,  0.11737238,  0.02946457],
       [ 0.05021417,  0.13395223,  0.25083759,  0.29002962,  0.27496645],
       [ 0.05021417,  0.13395223,  0.25083759,  0.29002962,  0.27496645],
       [ 0.04867735,  0.12985256,  0.24316061,  0.28115314,  0.29715633],
       [ 0.04867735,  0.12985256,  0.2

In [13]:
X[0]

array([[1, 0, 0],
       [0, 1, 0],
       [0, 0, 1],
       [1, 0, 0],
       [1, 0, 0]])