# Deep Learning
## Project_3

### - Task #1

In [1]:
"""
  Autocompletion of the last character of words
  Given the first three letters of a four-letters word, learn to predict the last letter 
"""
import os
os.environ['TF_CPP_MIN_LOG_LEVEL']='2'

import tensorflow as tf
import numpy as np


vocab = ['a', 'b', 'c', 'd', 'e', 'f', 'g',
         'h', 'i', 'j', 'k', 'l', 'm', 'n',
         'o', 'p', 'q', 'r', 's', 't', 'u',
         'v', 'w', 'x', 'y', 'z']

# index array of characters in vocab
v_map = {n: i for i, n in enumerate(vocab)}
v_len = len(v_map)

# training data (character sequences)
# wor -> X, d -> Y
# woo -> X, d -> Y
training_data = ['word', 'wood', 'deep', 'dive', 'cold', 'cool', 'load', 'love', 'kiss', 'kind']
test_data = ['wood', 'deep', 'cold', 'load', 'love', 'dear', 'dove', 'cell', 'life', 'keep']

def make_batch(seq_data):
    input_batch = []
    target_batch = []

    for seq in seq_data:
        # Indices of the first three alphabets of the words
        # [22, 14, 17] [22, 14, 14] [3, 4, 4] [3, 8, 21] ...
        input = [v_map[n] for n in seq[:-1]]
        # Indices of the last alphabet of the words
        # 3, 3, 15, 4, 3 ...
        target = v_map[seq[-1]]

        # One-hot encoding of the inputs into the sequences of 26-dimensional vectors
        # [0, 1, 2] ==>
        # [[ 1.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
        #  [ 0.  1.  0.  0.  0.  0.  0.  0.  0.  0.]
        #  [ 0.  0.  1.  0.  0.  0.  0.  0.  0.  0.]]
        input_batch.append(np.eye(v_len)[input])
        
        # We don't apply one-hot encoding for the output,  
        # since we'll use sparse_softmax_cross_entropy_with_logits
        # as our loss function
        target_batch.append(target)

    return input_batch, target_batch


learning_rate = 0.01
n_hidden = 10
total_epoch = 100
n_step = 3 # the length of the input sequence
n_input = n_class = v_len # the size of each input

"""
  Phase 1: Create the computation graph
"""
X = tf.placeholder(tf.float32, [None, n_step, n_input])
Y = tf.placeholder(tf.int32, [None])

W = tf.Variable(tf.random_normal([n_hidden, n_class]))
b = tf.Variable(tf.random_normal([n_class]))

#                output (pred. of forth letter)
#                 | (W,b)
#                outputs (hidden)   
#       |    |    | 
# RNN: [t1]-[t2]-[t3]
#       x1   x2   x3

# Create an LSTM cell
cell = tf.nn.rnn_cell.BasicLSTMCell(n_hidden)
# Apply dropout for regularization
#cell = tf.nn.rnn_cell.DropoutWrapper(cell, output_keep_prob=0.75)

# Create the RNN
outputs, states = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32)
# outputs : [batch_size, max_time, cell.output_size]

# Transform the output of RNN to create output values
outputs = tf.transpose(outputs, [1, 0, 2])
outputs = outputs[-1]
# [batch_size, cell.output_size]
model = tf.matmul(outputs, W) + b
# [batch_size, n_classes]

cost = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=model, labels=Y))

optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)

"""
  Phase 2: Train the model
"""
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())

    input_batch, target_batch = make_batch(training_data)

    for epoch in range(total_epoch):
        _, loss = sess.run([optimizer, cost],
                           feed_dict={X: input_batch, Y: target_batch})

        print('Epoch:', '%04d' % (epoch + 1),
              'cost =', '{:.6f}'.format(loss))

    print('Optimization finished')

    """
      Make predictions
    """
    seq_data = test_data
    prediction = tf.cast(tf.argmax(model, 1), tf.int32)
    accuracy = tf.reduce_mean(tf.cast(tf.equal(prediction, Y), tf.float32))

    input_batch, target_batch = make_batch(seq_data)

    predict, accuracy_val = sess.run([prediction, accuracy],
                                     feed_dict={X: input_batch, Y: target_batch})

    predicted = []
    for idx, val in enumerate(seq_data):
        last_char = vocab[predict[idx]]
        predicted.append(val[:3] + last_char)

    print('\n=== Predictions ===')
    print('Input:', [w[:3] + ' ' for w in seq_data])
    print('Predicted:', predicted)
    print('Accuracy:', accuracy_val)

Epoch: 0001 cost = 4.008907
Epoch: 0002 cost = 3.926097
Epoch: 0003 cost = 3.844792
Epoch: 0004 cost = 3.763762
Epoch: 0005 cost = 3.681812
Epoch: 0006 cost = 3.597792
Epoch: 0007 cost = 3.510710
Epoch: 0008 cost = 3.419755
Epoch: 0009 cost = 3.324250
Epoch: 0010 cost = 3.223634
Epoch: 0011 cost = 3.117485
Epoch: 0012 cost = 3.005560
Epoch: 0013 cost = 2.887846
Epoch: 0014 cost = 2.764598
Epoch: 0015 cost = 2.636387
Epoch: 0016 cost = 2.504138
Epoch: 0017 cost = 2.369148
Epoch: 0018 cost = 2.233070
Epoch: 0019 cost = 2.097843
Epoch: 0020 cost = 1.965582
Epoch: 0021 cost = 1.838394
Epoch: 0022 cost = 1.718150
Epoch: 0023 cost = 1.606258
Epoch: 0024 cost = 1.503475
Epoch: 0025 cost = 1.409801
Epoch: 0026 cost = 1.324508
Epoch: 0027 cost = 1.246323
Epoch: 0028 cost = 1.173705
Epoch: 0029 cost = 1.105172
Epoch: 0030 cost = 1.039602
Epoch: 0031 cost = 0.976456
Epoch: 0032 cost = 0.915834
Epoch: 0033 cost = 0.858366
Epoch: 0034 cost = 0.804951
Epoch: 0035 cost = 0.756411
Epoch: 0036 cost = 0

### - Task #2

In [2]:
tf.reset_default_graph()

In [3]:
"""
  Autocompletion of the last character of words
  Given the first three letters of a four-letters word, learn to predict the last letter
"""
import os
os.environ['TF_CPP_MIN_LOG_LEVEL']='2'

import tensorflow as tf
import numpy as np


vocab = ['a', 'b', 'c', 'd', 'e', 'f', 'g',
         'h', 'i', 'j', 'k', 'l', 'm', 'n',
         'o', 'p', 'q', 'r', 's', 't', 'u',
         'v', 'w', 'x', 'y', 'z']

# index array of characters in vocab
v_map = {n: i for i, n in enumerate(vocab)}
v_len = len(v_map)

# training data (character sequences)
# wor -> X, d -> Y
# woo -> X, d -> Y
training_data = ['word', 'wood', 'deep', 'dive', 'cold', 'cool', 'load', 'love', 'kiss', 'kind']
test_data = ['wood', 'deep', 'cold', 'load', 'love', 'dear', 'dove', 'cell', 'life', 'keep']

def make_batch(seq_data):
    input_batch = []
    target_batch = []

    for seq in seq_data:
        # Indices of the first three alphabets of the words
        # [22, 14, 17] [22, 14, 14] [3, 4, 4] [3, 8, 21] ...
        input = [v_map[seq[0]], v_map[seq[1]], v_map[seq[-1]]]
        # Indices of the last alphabet of the words
        # 3, 3, 15, 4, 3 ...
        target = v_map[seq[-2]]

        # One-hot encoding of the inputs into the sequences of 26-dimensional vectors
        # [0, 1, 2] ==>
        # [[ 1.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
        #  [ 0.  1.  0.  0.  0.  0.  0.  0.  0.  0.]
        #  [ 0.  0.  1.  0.  0.  0.  0.  0.  0.  0.]]
        input_batch.append(np.eye(v_len)[input])

        # We don't apply one-hot encoding for the output,
        # since we'll use sparse_softmax_cross_entropy_with_logits
        # as our loss function
        target_batch.append(target)

    return input_batch, target_batch


learning_rate = 0.01
n_hidden = 10
total_epoch = 100
n_step = 3 # the length of the input sequence
n_input = n_class = v_len # the size of each input

"""
  Phase 1: Create the computation graph
"""
X = tf.placeholder(tf.float32, [None, n_step, n_input])
Y = tf.placeholder(tf.int32, [None])

W = tf.Variable(tf.random_normal([n_hidden, n_class]))
b = tf.Variable(tf.random_normal([n_class]))

#                output (pred. of third letter)
#                 | (W,b)
#                outputs (hidden)
#       |    |    |
# RNN: [t1]-[t2]-[t3]
#       x1   x2   x3

# Create an LSTM cell
cell = tf.nn.rnn_cell.BasicLSTMCell(n_hidden)
# Apply dropout for regularization
#cell = tf.nn.rnn_cell.DropoutWrapper(cell, output_keep_prob=0.75)

# Create the RNN
outputs, states = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32)
# outputs : [batch_size, max_time, cell.output_size]

# Transform the output of RNN to create output values
outputs = tf.transpose(outputs, [1, 0, 2])
outputs = outputs[-1]
# [batch_size, cell.output_size]
model = tf.matmul(outputs, W) + b
# [batch_size, n_classes]

cost = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=model, labels=Y))

optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)

"""
  Phase 2: Train the model
"""
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())

    input_batch, target_batch = make_batch(training_data)

    for epoch in range(total_epoch):
        _, loss = sess.run([optimizer, cost],
                           feed_dict={X: input_batch, Y: target_batch})

        print('Epoch:', '%04d' % (epoch + 1),
              'cost =', '{:.6f}'.format(loss))

    print('Optimization finished')

    """
      Make predictions
    """
    seq_data = test_data
    prediction = tf.cast(tf.argmax(model, 1), tf.int32)
    accuracy = tf.reduce_mean(tf.cast(tf.equal(prediction, Y), tf.float32))

    input_batch, target_batch = make_batch(seq_data)

    predict, accuracy_val = sess.run([prediction, accuracy],
                                     feed_dict={X: input_batch, Y: target_batch})

    predicted = []
    for idx, val in enumerate(seq_data):
        middle_char = vocab[predict[idx]]
        predicted.append(val[:2] + middle_char + val[-1])

    print('\n=== Predictions ===')
    print('Input:', ["{}_{}".format(w[:2], w[-1]) for w in seq_data])
    print('Predicted:', predicted)
    print('Accuracy:', accuracy_val)

Epoch: 0001 cost = 3.538748
Epoch: 0002 cost = 3.478555
Epoch: 0003 cost = 3.421258
Epoch: 0004 cost = 3.365892
Epoch: 0005 cost = 3.311715
Epoch: 0006 cost = 3.258047
Epoch: 0007 cost = 3.204259
Epoch: 0008 cost = 3.149777
Epoch: 0009 cost = 3.094093
Epoch: 0010 cost = 3.036782
Epoch: 0011 cost = 2.977501
Epoch: 0012 cost = 2.915974
Epoch: 0013 cost = 2.851979
Epoch: 0014 cost = 2.785343
Epoch: 0015 cost = 2.715939
Epoch: 0016 cost = 2.643705
Epoch: 0017 cost = 2.568656
Epoch: 0018 cost = 2.490892
Epoch: 0019 cost = 2.410587
Epoch: 0020 cost = 2.327984
Epoch: 0021 cost = 2.243371
Epoch: 0022 cost = 2.157051
Epoch: 0023 cost = 2.069323
Epoch: 0024 cost = 1.980497
Epoch: 0025 cost = 1.890940
Epoch: 0026 cost = 1.801136
Epoch: 0027 cost = 1.711720
Epoch: 0028 cost = 1.623516
Epoch: 0029 cost = 1.537597
Epoch: 0030 cost = 1.455236
Epoch: 0031 cost = 1.377656
Epoch: 0032 cost = 1.305584
Epoch: 0033 cost = 1.238847
Epoch: 0034 cost = 1.176355
Epoch: 0035 cost = 1.116584
Epoch: 0036 cost = 1