In [1]:
import numpy as np
import tensorflow as tf

  return f(*args, **kwds)


In [2]:
batch_size = 128
embedding_dimension = 64
num_classes = 2
hidden_layer_size = 32
times_steps = 6
element_size = 1

In [3]:
digit_to_word_map = {1:"One",2:"Two", 3:"Three", 4:"Four", 5:"Five",6:"Six",7:"Seven",8:"Eight",9:"Nine"}
digit_to_word_map[0]="PAD"
even_sentences = []
odd_sentences = []
seqlens = []

for i in range(10000):
    rand_seq_len = np.random.choice(range(3,7))
    seqlens.append(rand_seq_len)
    
    rand_odd_ints = np.random.choice(range(1,10,2), rand_seq_len)
    rand_even_ints = np.random.choice(range(2,10,2), rand_seq_len)
    
    # Padding
    if rand_seq_len<6:
        rand_odd_ints = np.append(rand_odd_ints, [0]*(6-rand_seq_len))
        rand_even_ints = np.append(rand_even_ints, [0]*(6-rand_seq_len))
        
    even_sentences.append(" ".join([digit_to_word_map[r] for r in rand_odd_ints]))
    odd_sentences.append(" ".join([digit_to_word_map[r] for r in rand_even_ints]))
    data = even_sentences+odd_sentences
    
# Same seq lengths for even, odd sentences
seqlens*=2

In [4]:
even_sentences[0:6]

['Five One One PAD PAD PAD',
 'Three Seven Nine Five One PAD',
 'One Three Five One PAD PAD',
 'Five Nine Seven PAD PAD PAD',
 'Five One Nine Five PAD PAD',
 'Five Three Five Seven PAD PAD']

In [5]:
odd_sentences[0:6]

['Two Two Four PAD PAD PAD',
 'Eight Six Eight Six Four PAD',
 'Eight Eight Eight Four PAD PAD',
 'Eight Two Four PAD PAD PAD',
 'Eight Four Six Four PAD PAD',
 'Eight Six Two Two PAD PAD']

In [6]:
seqlens[0:6]

[3, 5, 4, 3, 4, 4]

In [7]:
# Map from words to indices
word2index_map ={}
index=0
for sent in data:
    for word in sent.lower().split():
        if word not in word2index_map:
            word2index_map[word] = index
            index+=1
# Inverse map
index2word_map = {index: word for word, index in word2index_map.items()}
vocabulary_size = len(index2word_map)

In [8]:
labels = [1]*10000 + [0]*10000
for i in range(len(labels)):
    label = labels[i]
    one_hot_encoding = [0]*2
    one_hot_encoding[label] = 1
    labels[i] = one_hot_encoding
    
data_indices = list(range(len(data)))
np.random.shuffle(data_indices)
data = np.array(data)[data_indices]
labels = np.array(labels)[data_indices]
seqlens = np.array(seqlens)[data_indices]
train_x = data[:10000]
train_y = labels[:10000]
train_seqlens = seqlens[:10000]
test_x = data[10000:]
test_y = labels[10000:]
test_seqlens = seqlens[10000:]

In [9]:
def get_sentence_batch(batch_size,data_x, data_y, data_seqlens):
    instance_indices = list(range(len(data_x)))
    np.random.shuffle(instance_indices)
    batch = instance_indices[:batch_size]
    x = [[word2index_map[word] for word in data_x[i].lower().split()] for i in batch]
    y = [data_y[i] for i in batch]
    seqlens = [data_seqlens[i] for i in batch]
    return x,y,seqlens

In [10]:
_inputs = tf.placeholder(tf.int32, shape=[batch_size,times_steps])
_labels = tf.placeholder(tf.float32, shape=[batch_size, num_classes])
# seqlens for dynamic calculation
_seqlens = tf.placeholder(tf.int32, shape=[batch_size])

In [11]:
with tf.name_scope("embeddings"):
    embeddings = tf.Variable(tf.random_uniform([vocabulary_size, embedding_dimension], -1.0, 1.0),
                             name='embedding')
    embed = tf.nn.embedding_lookup(embeddings, _inputs)

In [12]:
with tf.variable_scope("lstm"):
    lstm_cell = tf.contrib.rnn.BasicLSTMCell(hidden_layer_size, forget_bias=1.0)
    outputs, states = tf.nn.dynamic_rnn(lstm_cell, embed, sequence_length = _seqlens, dtype=tf.float32)
    
weights = {
'linear_layer': tf.Variable(tf.truncated_normal([hidden_layer_size, num_classes], mean=0,stddev=.01))
}

biases = {
'linear_layer':tf.Variable(tf.truncated_normal([num_classes], mean=0,stddev=.01))
}

# Extract the last relevant output and use in a linear layer
final_output = tf.matmul(states[1], weights["linear_layer"]) + biases["linear_layer"]
softmax = tf.nn.softmax_cross_entropy_with_logits(logits = final_output, labels = _labels)
cross_entropy = tf.reduce_mean(softmax)

In [14]:
train_step = tf.train.RMSPropOptimizer(0.001, 0.9).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(_labels,1), tf.argmax(final_output,1))
accuracy = (tf.reduce_mean(tf.cast(correct_prediction, tf.float32))) * 100

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    
    for step in range(1000):
        x_batch, y_batch,seqlen_batch = get_sentence_batch(batch_size, train_x,train_y, train_seqlens)
        sess.run(train_step,feed_dict={_inputs:x_batch, _labels:y_batch, _seqlens:seqlen_batch})
        if step % 100 == 0:
            acc = sess.run(accuracy,feed_dict={_inputs:x_batch,
                                               _labels:y_batch,
                                               _seqlens:seqlen_batch})
            print("Accuracy at %d: %.5f" % (step, acc))
        
    for test_batch in range(5):
        x_test, y_test,seqlen_test = get_sentence_batch(batch_size, test_x,test_y, test_seqlens)
        batch_pred,batch_acc = sess.run([tf.argmax(final_output,1), accuracy], 
                                        feed_dict={_inputs:x_test,
                                                   _labels:y_test,
                                                   _seqlens:seqlen_test})
        print("Test batch accuracy %d: %.5f" % (test_batch, batch_acc))
        
    output_example = sess.run([outputs],feed_dict={_inputs:x_test, _labels:y_test, _seqlens:seqlen_test})
    states_example = sess.run([states[1]],feed_dict={_inputs:x_test, _labels:y_test, _seqlens:seqlen_test})

Accuracy at 0: 21.87500
Accuracy at 100: 100.00000
Accuracy at 200: 100.00000
Accuracy at 300: 100.00000
Accuracy at 400: 100.00000
Accuracy at 500: 100.00000
Accuracy at 600: 100.00000
Accuracy at 700: 100.00000
Accuracy at 800: 100.00000
Accuracy at 900: 100.00000
Test batch accuracy 0: 100.00000
Test batch accuracy 1: 100.00000
Test batch accuracy 2: 100.00000
Test batch accuracy 3: 100.00000
Test batch accuracy 4: 100.00000


In [21]:
output_example[0][4]

array([[-0.45338485,  0.22028518,  0.4866209 , -0.39610946, -0.43948761,
         0.40012556,  0.4073998 ,  0.50630784,  0.53446972, -0.50361568,
        -0.56097639, -0.14107759, -0.37376055, -0.51496172, -0.0420157 ,
        -0.40838203, -0.13329203, -0.52536339,  0.50354028,  0.48788941,
        -0.30271274, -0.48202905,  0.19885591, -0.35509589,  0.40323624,
        -0.40325791, -0.32722124, -0.22728226,  0.47828978, -0.50703698,
        -0.48974565, -0.34652722],
       [-0.77031815,  0.33554065,  0.75114548, -0.71546733, -0.66875219,
         0.73573929,  0.68502128,  0.7549054 ,  0.80797565, -0.74662584,
        -0.79945141, -0.19492942, -0.3570503 , -0.78755808, -0.34668693,
        -0.76519352, -0.53776652, -0.77256197,  0.83429724,  0.71421516,
        -0.72635263, -0.73106301,  0.65684772, -0.75637931,  0.68608886,
        -0.42445445, -0.68894297, -0.5665853 ,  0.72541368, -0.73829049,
        -0.75690573, -0.6751951 ],
       [-0.89282179,  0.69143522,  0.87298918, -0.7998

In [None]:
# Example of LSTM with multiple layers

num_LSTM_layers = 2

with tf.variable_scope("lstm"):
    lstm_cell = tf.contrib.rnn.BasicLSTMCell(hidden_layer_size, forget_bias=1.0)
    cell = tf.contrib.rnn.MultiRNNCell(cells=[lstm_cell]*num_LSTM_layers, state_is_tuple=True)
    outputs, states = tf.nn.dynamic_rnn(cell, embed, sequence_length = _seqlens, dtype=tf.float32)
    
# Extract the final state and use in a linear layer
final_output = tf.matmul(states[num_LSTM_layers-1][1], weights["linear_layer"]) + biases["linear_layer"]