In [1]:
import tensorflow as tf
import numpy as np

In [2]:
def encoder(batch_size, 
            sequence_len,
            symbol_size,
            embedding_size,
            hidden_size, 
            layer_size):
    enc_batch_inputs = tf.placeholder(tf.int32, shape=(batch_size, sequence_len))
    
    with tf.variable_scope('encoder') as scope:
        enc_embeddings = tf.Variable(tf.random_uniform([symbol_size,
                                                        embedding_size],
                                                        -1.0, 1.0))
        batch_embeddings = tf.nn.embedding_lookup(enc_embeddings, enc_batch_inputs)
        fw_cell = tf.contrib.rnn.GRUCell(num_units=hidden_size)
        bw_cell = tf.contrib.rnn.GRUCell(num_units=hidden_size)
        
        fw_initial_state = fw_cell.zero_state(batch_size, tf.float32)
        bw_initial_state = bw_cell.zero_state(batch_size, tf.float32)
        '''
        outputs, states = tf.nn.dynamic_rnn(cell, 
                                            batch_embeddings, 
                                            initial_state=initial_state, 
                                            dtype=tf.float32)
        '''
        outputs, states = tf.nn.bidirectional_dynamic_rnn(fw_cell,
                                                          bw_cell,
                                                          batch_embeddings,
                                                          initial_state_fw=fw_initial_state,
                                                          initial_state_bw=bw_initial_state,
                                                          dtype=tf.float32)
        
    return enc_batch_inputs, outputs, states

In [3]:
# testing encoder
batch_size = 3
sequence_len = 3
symbol_size = 10
embedding_size = 64
hidden_size = 3
layer_size = 1

x = np.array([[1,2,3],
              [4,5,6],
              [7,8,9]])

enc_batch_inputs, outputs, states = encoder(batch_size, 
                                        sequence_len, 
                                        symbol_size,
                                        embedding_size,
                                        hidden_size,
                                        layer_size)

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    print(sess.run([tf.concat([outputs[0], outputs[1]], axis=2), states[0]], feed_dict={enc_batch_inputs:x}))

[array([[[ 0.03344049, -0.21208596,  0.17080951,  0.10750819, -0.2180682 ,
         -0.25028053],
        [ 0.53089482, -0.51354712, -0.13939421,  0.14689639, -0.26300001,
         -0.27216142],
        [ 0.44177771, -0.70648837, -0.15198936,  0.16196719, -0.02284231,
         -0.24801633]],

       [[-0.05370371,  0.26331165,  0.10058122, -0.32875204, -0.25263146,
          0.39933598],
        [-0.12632224,  0.45016894,  0.17794411, -0.22694975, -0.2860693 ,
          0.53271067],
        [-0.05181505,  0.23848814,  0.0676185 ,  0.03374463, -0.00885347,
          0.4509646 ]],

       [[-0.13132375,  0.03676818,  0.0580357 , -0.49194753, -0.06238506,
          0.13777252],
        [-0.15022957, -0.0172472 , -0.03639007, -0.03794547,  0.04221664,
          0.17881306],
        [-0.11998031,  0.17730625, -0.19943129, -0.09116621, -0.30373982,
          0.08141112]]], dtype=float32), array([[ 0.44177771, -0.70648837, -0.15198936],
       [-0.05181505,  0.23848814,  0.0676185 ],
       [

In [3]:
def attention(encoder_hidden_states, 
              last_hidden_state):
    encoder_seq_len = encoder_hidden_states.get_shape().as_list()[1]
    encoder_hidden_state_dim = encoder_hidden_states.get_shape().as_list()[2]
    last_hidden_state_dim = last_hidden_state.get_shape().as_list()[1]
    with tf.variable_scope('attention') as scope:
        _encoder_hidden_states = tf.reshape(encoder_hidden_states, shape=[-1, encoder_hidden_state_dim])
        W_a = tf.Variable(tf.random_uniform([last_hidden_state_dim,
                                             last_hidden_state_dim],
                                             -1.0, 1.0), name='W_a')
        U_a = tf.Variable(tf.random_uniform([encoder_hidden_state_dim,
                                             last_hidden_state_dim],
                                             -1.0, 1.0), name='U_a')
        v_a = tf.Variable(tf.random_uniform([last_hidden_state_dim, 1],
                                             -1.0, 1.0), name='v_a')
        b = tf.Variable(tf.zeros(shape=[last_hidden_state_dim]), name='b')
        
        
        producted_hidden_states = tf.reshape(tf.matmul(_encoder_hidden_states, U_a), 
                                             shape=[-1, encoder_seq_len, last_hidden_state_dim])
        producted_last_state = tf.expand_dims(tf.matmul(last_hidden_state, W_a), dim=1)
        signals = tf.reshape(tf.nn.tanh(producted_hidden_states + producted_last_state + b), 
                             shape=[-1, last_hidden_state_dim])
        alignment = tf.reshape(tf.matmul(signals, v_a), [-1, encoder_seq_len], name='alignment')
        alpha = tf.nn.softmax(alignment, name='alpha')
        context = tf.matmul(tf.transpose(tf.expand_dims(alpha, dim=2), perm=[0,2,1]), encoder_hidden_states,
                                name='context')
        context = tf.reshape(context, [-1, encoder_hidden_state_dim])
        
    return context

In [7]:
# Testing Attention
# Assumed q as a matrix which has 3 hidden states at each 3 sequences in 3 batches 
q = np.array([[[1,2,3,4,5,6],[4,5,6,2,3,4],[7,8,9,5,6,7]],
              [[1,2,3,3,2,1],[4,5,6,2,3,4],[7,8,9,5,6,7]],
              [[1,2,3,4,5,6],[4,5,6,7,8,9],[7,8,9,0,0,0]]])

# Assumed v as a matrix which has 3 last hidden state in each 3 batches
v = np.array([[1,2,3],[4,5,6],[7,8,9]])

q_t = tf.constant(q, dtype=tf.float32)
v_t = tf.constant(v, dtype=tf.float32)

red_q = tf.reduce_sum(q_t, reduction_indices=1)

context = attention(q_t,v_t)
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    print(tf.matmul(tf.transpose(tf.expand_dims(v_t, dim=2), perm=[0,2,1]),q_t).eval())
    print(red_q.eval())
    print(context.eval())

3 6
(9, 3)
(3, 3)
(3, 3)
[[[  30.   36.   42.   23.   29.   35.]]

 [[  66.   81.   96.   52.   59.   66.]]

 [[ 102.  126.  150.   84.   99.  114.]]]
[[ 12.  15.  18.  11.  14.  17.]
 [ 12.  15.  18.  10.  11.  12.]
 [ 12.  15.  18.  11.  13.  15.]]
[[ 3.92429399  4.92429399  5.92429399  3.66749001  4.66749001  5.66749001]
 [ 4.21626616  5.21626663  6.21626663  3.35436916  3.7853179   4.21626616]
 [ 3.31743479  4.31743479  5.31743479  4.4238143   5.23445225  6.0450902 ]]


In [5]:
def decoder(batch_size, 
            sequence_len,
            symbol_size,
            embedding_size,
            hidden_size,
            layer_size,
            encoder_hidden_state,
            context_vectors,
            train=True):
    if train:
        dec_batch_inputs = tf.placeholder(tf.int32, shape=(batch_size, sequence_len))
        batch_labels = tf.placeholder(tf.int32, shape=(batch_size, sequence_len))
    
    with tf.variable_scope('decoder') as scope:
        embeddings = tf.Variable(tf.random_uniform([symbol_size,
                                                    embedding_size],
                                                    -1.0, 1.0))
        if train:
            dec_batch_embeddings = tf.nn.embedding_lookup(embeddings, dec_batch_inputs)
        
        cells = [tf.contrib.rnn.GRUCell(num_units=hidden_size) for _ in range(sequence_len)]
        initial_state = encoder_hidden_state
        outputs_list = []
        
        for t in range(sequence_len):
            with tf.variable_scope('decoder_' + str(t) + '_step') as t_scope:
                context = attention(context_vectors, initial_state)
                concated_batch_inputs = tf.expand_dims(tf.concat([dec_batch_embeddings[:, t], context], 1),
                                                      dim=1)
                cell = cells[t]
                output, state = tf.nn.dynamic_rnn(cell,
                                                  concated_batch_inputs,
                                                  initial_state=initial_state,
                                                  dtype=tf.float32)
                outputs_list.append(tf.reshape(output, [-1, hidden_size]))
                initial_state = state
        
        outputs = tf.stack(outputs_list, axis=1)
        inputs_for_fc = tf.reshape(outputs, [-1, hidden_size])
        fc_outputs = tf.contrib.layers.fully_connected(inputs=inputs_for_fc,
                                                       num_outputs=symbol_size)
        logits = tf.reshape(fc_outputs, [batch_size, sequence_len, symbol_size])
        weights = tf.ones([batch_size, sequence_len])
        sequence_loss = tf.contrib.seq2seq.sequence_loss(logits=logits,
                                                         targets=batch_labels,
                                                         weights=weights)
        loss = tf.reduce_mean(sequence_loss)
        prediction = tf.argmax(logits, axis=2)
        
    return dec_batch_inputs, batch_labels, prediction, loss

In [6]:
# integrated testing
batch_size = 3
sequence_len = 3
symbol_size = 10
embedding_size = 64
hidden_size = 3
layer_size = 1

x = np.array([[1,2,3],
              [4,5,6],
              [7,8,9]])
Y = np.array([[1,2,3,4],
              [5,6,7,8],
              [7,8,9,0]])

enc_batch_inputs, outputs, state = encoder(batch_size, 
                                        sequence_len, 
                                        symbol_size,
                                        embedding_size,
                                        hidden_size,
                                        layer_size)

context_vectors = tf.concat([outputs[0], outputs[1]], axis=2)
encoder_hidden_state = state[1]

batch_size = 3 
sequence_len = 4
symbol_size = 10
embedding_size = 64
hidden_size = 3
layer_size = 1
            
prediction, loss = decoder(batch_size, 
                          sequence_len,
                          symbol_size,
                          embedding_size,
                          hidden_size,
                          layer_size,
                          encoder_hidden_state,
                          context_vectors,
                          train=False)

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for i in range(10):
        print(sess.run([prediction], feed_dict={enc_batch_inputs:x}))

UnboundLocalError: local variable 'dec_batch_inputs' referenced before assignment