## What is Seq2seq?
- Encoder Decoder architecture
- frequently used to build translator, chatbot
- appropriate when the length of the input sequence does not have the same length as the output data
<img width="688" alt="image" src="https://user-images.githubusercontent.com/35142536/61588681-ca7e3600-abda-11e9-8eee-5cf840689925.png">

If you would like to know more, go to this [link](https://towardsdatascience.com/nlp-sequence-to-sequence-networks-part-2-seq2seq-model-encoderdecoder-model-6c22e29fd7e1)!

## Code Practice

In [1]:
import tensorflow as tf
import numpy as np

tf.reset_default_graph()

In [2]:
char_arr = [c for c in 'SEPabcdefghijklmnopqrstuvwxyz']
num_dic = {n: i for i, n in enumerate(char_arr)}

#seq_data = [['man', '남자'], ['black', '검정'], ['king', '왕'], ['girl', '소녀'], ['up', '오르기'], ['high', '높다']]
seq_data = [['man', 'women'], ['black', 'white'], ['king', 'queen'], ['girl', 'boy'], ['up', 'down'], ['high', 'low']]

In [3]:
# S: Symbol that shows starting of decoding input
# E: Symbol that shows starting of decoding output
# P: Symbol that will fill in blank sequence if current batch data size is short than time steps


# Seq2seq parameter 
n_step = 5
n_hidden = 128
n_class = len(num_dic)

def make_batch(seq_data):
    input_batch, output_batch, target_batch =[], [], []
    
    for seq in seq_data:
        for i in range(2):
            # input과 output 글자수 안맞는거(n_step length만큼) P로 채워주기
            # e.g) manPP, women/ kingP, queen
            seq[i] = seq[i] + 'P' * (n_step-len(seq[i]))
            #print(seq[i])
        
        #seq[0]은 글자수 맞춰진 input값, seq[1]는 output 값
        input = [num_dic[n] for n in seq[0]]
        output = [num_dic[n] for n in ('S' + seq[1])]
        target = [num_dic[n] for n in (seq[1] + 'E')]
        
        #print(input)
        #print(output)
        #print(target)
        
        input_batch.append(np.eye(n_class)[input])
        output_batch.append(np.eye(n_class)[output])
        target_batch.append(target)
        
    return input_batch, output_batch, target_batch

In [16]:
# make_batch result
make_batch(seq_data)

manPP
women
[15, 3, 16, 2, 2]
[0, 25, 17, 15, 7, 16]
[25, 17, 15, 7, 16, 1]
black
white
[4, 14, 3, 5, 13]
[0, 25, 10, 11, 22, 7]
[25, 10, 11, 22, 7, 1]
kingP
queen
[13, 11, 16, 9, 2]
[0, 19, 23, 7, 7, 16]
[19, 23, 7, 7, 16, 1]
girlP
boyPP
[9, 11, 20, 14, 2]
[0, 4, 17, 27, 2, 2]
[4, 17, 27, 2, 2, 1]
upPPP
downP
[23, 18, 2, 2, 2]
[0, 6, 17, 25, 16, 2]
[6, 17, 25, 16, 2, 1]
highP
lowPP
[10, 11, 9, 10, 2]
[0, 14, 17, 25, 2, 2]
[14, 17, 25, 2, 2, 1]


([array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]),
  array([[0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 1., 0., 0., 0., 0

In [4]:
# model
enc_input = tf.placeholder(tf.float32, [None, None, n_class]) # [batch_size, max_len(=encoder_step), n_class]
dec_input = tf.placeholder(tf.float32, [None, None, n_class]) # [batch_size, max_len+1(=decoder_step) (becase of 'S' or 'E'), n_class]
targets = tf.placeholder(tf.int64, [None, None])  # [batch_size, max_len+1], not one-hot

with tf.variable_scope('encode'):
    enc_cell = tf.nn.rnn_cell.BasicRNNCell(n_hidden)
    enc_cell = tf.nn.rnn_cell.DropoutWrapper(enc_cell, output_keep_prob=0.5)
    _, enc_states = tf.nn.dynamic_rnn(enc_cell, enc_input, dtype=tf.float32)
    
with tf.variable_scope('decode'):
    dec_cell = tf.nn.rnn_cell.BasicRNNCell(n_hidden)
    dec_cell = tf.nn.rnn_cell.DropoutWrapper(dec_cell, output_keep_prob=0.5)
    outputs,_ = tf.nn.dynamic_rnn(dec_cell, dec_input, initial_state=enc_states, dtype=tf.float32)    
    # outputs : [batch_size, max_len+1, n_hidden(=128)]
    
model = tf.layers.dense(outputs, n_class, activation=None)
cost = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=model, labels=targets))
optimizer = tf.train.AdamOptimizer(0.001).minimize(cost)

Instructions for updating:
This class is equivalent as tf.keras.layers.SimpleRNNCell, and will be replaced by that in Tensorflow 2.0.
Instructions for updating:
Please use `keras.layers.RNN(cell)`, which is equivalent to this API
Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Instructions for updating:
Use keras.layers.dense instead.


In [16]:
# training
sess = tf.Session()
sess.run(tf.global_variables_initializer())
input_batch, output_batch, target_batch = make_batch(seq_data)

for epoch in range(5000):
    _, loss = sess.run([optimizer, cost], feed_dict={enc_input: input_batch, dec_input:output_batch, targets: target_batch})
    if (epoch+1)%500 == 0:
        print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))
        
# test
def translate(word):
    #since we do not know the result yet, just put Ps on output
    seq_data = [word, 'P' * len(word)]
    
    input_batch, output_batch, _ = make_batch([seq_data])
    prediction = tf.argmax(model,2)
    
    results = sess.run(prediction, feed_dict={enc_input: input_batch, dec_input: output_batch})
    
    decoded = [char_arr[i] for i in results[0]]
    end = decoded.index('E')
    translated = ''.join(decoded[:end])
    # considering max len, replace P with space cux it is not necessary in actual result
    return translated.replace('P', ' ')


print('---test----')
print('man ->', translate('man'))
print('mans ->', translate('mans'))
print('king ->', translate('king'))
print('black ->', translate('black'))
print('upp ->', translate('upp'))
print('ups ->', translate('ups'))

Epoch: 0500 cost = 0.003902
Epoch: 1000 cost = 0.001087
Epoch: 1500 cost = 0.000288
Epoch: 2000 cost = 0.000293
Epoch: 2500 cost = 0.000271
Epoch: 3000 cost = 0.000114
Epoch: 3500 cost = 0.000049
Epoch: 4000 cost = 0.000063
Epoch: 4500 cost = 0.000043
Epoch: 5000 cost = 0.000052
---test----
man -> women
mans -> women
king -> queen
black -> white
upp -> down 
ups -> down 
