In [1]:
"""
  Autocompletion of the last character of words
  Given the first three letters of a four-letters word, learn to predict the last letter 
"""
import os
os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
 
import tensorflow as tf
import numpy as np


vocab = ['a', 'b', 'c', 'd', 'e', 'f', 'g',
         'h', 'i', 'j', 'k', 'l', 'm', 'n',
         'o', 'p', 'q', 'r', 's', 't', 'u',
         'v', 'w', 'x', 'y', 'z']

# index array of characters in vocab
v_map = {n: i for i, n in enumerate(vocab)}
v_len = len(v_map)

# training data (character sequences)
# wor -> X, d -> Y
# woo -> X, d -> Y
training_data = ['word', 'wood', 'deep', 'dive', 'cold', 'cool', 'load', 'love', 'kiss', 'kind']
test_data = ['wood', 'deep', 'cold', 'load', 'love', 'dear', 'dove', 'cell', 'life', 'keep']

def make_batch(seq_data):
    input_batch = []
    target_batch = []

    for seq in seq_data:
        # Indices of the first three alphabets of the words
        # [22, 14, 17] [22, 14, 14] [3, 4, 4] [3, 8, 21] ...
        input = [v_map[n] for n in seq[:-1]]
        # Indices of the last alphabet of the words
        # 3, 3, 15, 4, 3 ...
        target = v_map[seq[-1]]

        # One-hot encoding of the inputs into the sequences of 26-dimensional vectors
        # [0, 1, 2] ==>
        # [[ 1.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
        #  [ 0.  1.  0.  0.  0.  0.  0.  0.  0.  0.]
        #  [ 0.  0.  1.  0.  0.  0.  0.  0.  0.  0.]]
        input_batch.append(np.eye(v_len)[input])
        
        # We don't apply one-hot encoding for the output,  
        # since we'll use sparse_softmax_cross_entropy_with_logits
        # as our loss function
        target_batch.append(target)

    return input_batch, target_batch


learning_rate = 0.01
n_hidden = 10
total_epoch = 100
n_step = 3 # the length of the input sequence
n_input = n_class = v_len # the size of each input

"""
  Phase 1: Create the computation graph
"""
X = tf.placeholder(tf.float32, [None, n_step, n_input])
Y = tf.placeholder(tf.int32, [None])

W = tf.Variable(tf.random_normal([n_hidden, n_class]))
b = tf.Variable(tf.random_normal([n_class]))

#                output (pred. of forth letter)
#                 | (W,b)
#                outputs (hidden)   
#       |    |    | 
# RNN: [t1]-[t2]-[t3]
#       x1   x2   x3

# Create an LSTM cell
cell = tf.nn.rnn_cell.BasicLSTMCell(n_hidden)
# Apply dropout for regularization
#cell = tf.nn.rnn_cell.DropoutWrapper(cell, output_keep_prob=0.75)

# Create the RNN
outputs, states = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32)
# outputs : [batch_size, max_time, cell.output_size]
print("1:", outputs)
# Transform the output of RNN to create output values
outputs = tf.transpose(outputs, [1, 0, 2])
print("2:", outputs)
outputs = outputs[2]
print("3:", outputs)
# [batch_size, cell.output_size]
model = tf.matmul(outputs, W) + b
print("4:", model)
# [batch_size, n_classes]

cost = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=model, labels=Y))

optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)

"""
  Phase 2: Train the model
"""
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())

    input_batch, target_batch = make_batch(training_data)

    for epoch in range(total_epoch):
        _, loss = sess.run([optimizer, cost],
                           feed_dict={X: input_batch, Y: target_batch})

        print('Epoch:', '%04d' % (epoch + 1),
              'cost =', '{:.6f}'.format(loss))

    print('Optimization finished')

    """
      Make predictions
    """
    seq_data = training_data # test_data
    prediction = tf.cast(tf.argmax(model, 1), tf.int32)
    accuracy = tf.reduce_mean(tf.cast(tf.equal(prediction, Y), tf.float32))

    input_batch, target_batch = make_batch(seq_data)

    predict, accuracy_val = sess.run([prediction, accuracy],
                                     feed_dict={X: input_batch, Y: target_batch})

    predicted = []
    for idx, val in enumerate(seq_data):
        last_char = vocab[predict[idx]]
        predicted.append(val[:3] + last_char)

    print('\n=== Predictions ===')
    print('Input:', [w[:3] + ' ' for w in seq_data])
    print('Predicted:', predicted)
    print('Accuracy:', accuracy_val)
    

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
This class is equivalent as tf.keras.layers.LSTMCell, and will be replaced by that in Tensorflow 2.0.
Instructions for updating:
Please use `keras.layers.RNN(cell)`, which is equivalent to this API
1: Tensor("rnn/transpose_1:0", shape=(?, 3, 10), dtype=float32)
2: Tensor("transpose:0", shape=(3, ?, 10), dtype=float32)
3: Tensor("strided_slice:0", shape=(?, 10), dtype=float32)
4: Tensor("add:0", shape=(?, 26), dtype=float32)
Epoch: 0001 cost = 3.690794
Epoch: 0002 cost = 3.602915
Epoch: 0003 cost = 3.514064
Epoch: 0004 cost = 3.423098
Epoch: 0005 cost = 3.329103
Epoch: 0006 cost = 3.231482
Epoch: 0007 cost = 3.129786
Epoch: 0008 cost = 3.023679
Epoch: 0009 cost = 2.912956
Epoch: 0010 cost = 2.797620
Epoch: 0011 cost = 2.677945
Epoch: 0012 cost = 2.554542
Epoch: 0013 cost = 2.428409
Epoch: 0014 cost = 2.300924
Epoch: 0015 cost = 2.173789
Epoch: 0016 cost = 2.048890
Epoch: 0017 cost = 1.9280

In [1]:
"""
  Autocompletion of the last character of words
  Given the first three letters of a four-letters word, learn to predict the last letter 
"""
import os
os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
 
import tensorflow as tf
import numpy as np


vocab = ['a', 'b', 'c', 'd', 'e', 'f', 'g',
         'h', 'i', 'j', 'k', 'l', 'm', 'n',
         'o', 'p', 'q', 'r', 's', 't', 'u',
         'v', 'w', 'x', 'y', 'z']

# index array of characters in vocab
v_map = {n: i for i, n in enumerate(vocab)}
v_len = len(v_map)
print(v_map)
# training data (character sequences)
# wod -> X, r -> Y
# wod -> X, o -> Y
training_data = ['word', 'wood', 'deep', 'dive', 'cold', 'cool', 'load', 'love', 'kiss', 'kind']
test_data = ['wood', 'deep', 'cold', 'load', 'love', 'dear', 'dove', 'cell', 'life', 'keep']

def make_batch(seq_data):
    input_batch = []
    target_batch = []

    for seq in seq_data:
        # Indices of the first, second, and the fourth alphabets of the words
        # [22, 14, 3] [22, 14, 3] ...
        new_seq = seq[:2] + seq[3]
        input = [v_map[n] for n in new_seq]
        # Indices of the third alphabet of the words
        # 17, 14 ...
        target = v_map[seq[2]]

        # One-hot encoding of the inputs into the sequences of 26-dimensional vectors
        # [0, 1, 2] ==>
        # [[ 1.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
        #  [ 0.  1.  0.  0.  0.  0.  0.  0.  0.  0.]
        #  [ 0.  0.  1.  0.  0.  0.  0.  0.  0.  0.]]
        input_batch.append(np.eye(v_len)[input])
        
        # We don't apply one-hot encoding for the output,  
        # since we'll use sparse_softmax_cross_entropy_with_logits
        # as our loss function
        target_batch.append(target)

    return input_batch, target_batch


learning_rate = 0.01
n_hidden = 10
total_epoch = 100
n_step = 3 # the length of the input sequence
n_input = n_class = v_len # the size of each input

"""
  Phase 1: Create the computation graph
"""
X = tf.placeholder(tf.float32, [None, n_step, n_input])
Y = tf.placeholder(tf.int32, [None])

W = tf.Variable(tf.random_normal([n_hidden, n_class]))
b = tf.Variable(tf.random_normal([n_class]))

#                output (pred. of forth letter)
#                 | (W,b)
#                outputs (hidden)   
#       |    |    | 
# RNN: [t1]-[t2]-[t3]
#       x1   x2   x3

# Create an LSTM cell
cell = tf.nn.rnn_cell.BasicLSTMCell(n_hidden)
# Apply dropout for regularization
#cell = tf.nn.rnn_cell.DropoutWrapper(cell, output_keep_prob=0.75)

# Create the RNN
outputs, states = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32)
# outputs : [batch_size, max_time, cell.output_size]
print("1:", outputs)
# Transform the output of RNN to create output values
outputs = tf.transpose(outputs, [1, 0, 2])
print("2:", outputs)
outputs = outputs[2]
print("3:", outputs)
# [batch_size, cell.output_size]
model = tf.matmul(outputs, W) + b
print("4:", model)
# [batch_size, n_classes]

cost = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=model, labels=Y))

optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)

"""
  Phase 2: Train the model
"""
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())

    input_batch, target_batch = make_batch(training_data)

    for epoch in range(total_epoch):
        _, loss = sess.run([optimizer, cost],
                           feed_dict={X: input_batch, Y: target_batch})

        print('Epoch:', '%04d' % (epoch + 1),
              'cost =', '{:.6f}'.format(loss))

    print('Optimization finished')

    """
      Make predictions
    """
    seq_data = training_data # test_data
    prediction = tf.cast(tf.argmax(model, 1), tf.int32)
    accuracy = tf.reduce_mean(tf.cast(tf.equal(prediction, Y), tf.float32))

    input_batch, target_batch = make_batch(seq_data)

    predict, accuracy_val = sess.run([prediction, accuracy],
                                     feed_dict={X: input_batch, Y: target_batch})

    predicted = []
    for idx, val in enumerate(seq_data):
        #last_char = vocab[predict[idx]]
        third_char = vocab[predict[idx]]
        predicted.append(val[:2] + third_char + val[-1])

    print('\n=== Predictions ===')
    print('Input:', [w[:2] + ' ' + w[3] for w in seq_data])
    print('Predicted:', predicted)
    print('Accuracy:', accuracy_val)


{'a': 0, 'b': 1, 'c': 2, 'd': 3, 'e': 4, 'f': 5, 'g': 6, 'h': 7, 'i': 8, 'j': 9, 'k': 10, 'l': 11, 'm': 12, 'n': 13, 'o': 14, 'p': 15, 'q': 16, 'r': 17, 's': 18, 't': 19, 'u': 20, 'v': 21, 'w': 22, 'x': 23, 'y': 24, 'z': 25}
Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
This class is equivalent as tf.keras.layers.LSTMCell, and will be replaced by that in Tensorflow 2.0.
Instructions for updating:
Please use `keras.layers.RNN(cell)`, which is equivalent to this API
1: Tensor("rnn/transpose_1:0", shape=(?, 3, 10), dtype=float32)
2: Tensor("transpose:0", shape=(3, ?, 10), dtype=float32)
3: Tensor("strided_slice:0", shape=(?, 10), dtype=float32)
4: Tensor("add:0", shape=(?, 26), dtype=float32)
Epoch: 0001 cost = 4.193039
Epoch: 0002 cost = 4.120680
Epoch: 0003 cost = 4.052382
Epoch: 0004 cost = 3.987197
Epoch: 0005 cost = 3.924026
Epoch: 0006 cost = 3.861754
Epoch: 0007 cost = 3.799402
Epoch: 0008 cost = 3.736212
Epoch: 0009 cost = 3.671