In [1]:
import tensorflow as tf
import numpy as np

In [2]:
char_rdic = ['h', 'e', 'l', 'o'] # id -> char
char_dic = {w : i for i, w in enumerate(char_rdic)} # char -> id
print (char_dic)

{'l': 2, 'o': 3, 'e': 1, 'h': 0}


In [3]:
ground_truth = [char_dic[c] for c in 'hello']
print (ground_truth)

[0, 1, 2, 2, 3]


In [4]:
x_data = np.array([[1,0,0,0], # h
                           [0,1,0,0], # e
                           [0,0,1,0], # l
                           [0,0,1,0]], # l
                           dtype = 'f')

In [5]:
x_data = tf.one_hot(ground_truth[:-1], len(char_dic), 1.0, 0.0, -1)
print(x_data)

Tensor("one_hot:0", shape=(4, 4), dtype=float32)


In [6]:
# Configuration
rnn_size = len(char_dic) # 1 hot coding (one of 4)
batch_size = 1 # one sample
time_step_size = 4

In [7]:
# RNN Model
rnn_cell = tf.nn.rnn_cell.BasicRNNCell(num_units = rnn_size,
                                       input_size = None, # deprecated at tensorflow 0.9
                                       #activation = tanh,
                                       )
initial_state = tf.zeros([batch_size, rnn_cell.state_size])
x_split = tf.split(0, time_step_size, x_data) # 가로축으로 4개로 split
outputs, state = tf.nn.rnn(cell = rnn_cell, inputs = x_split, initial_state = initial_state)

In [8]:
print (outputs)
print (state)

[<tf.Tensor 'RNN/BasicRNNCell/Tanh:0' shape=(1, 4) dtype=float32>, <tf.Tensor 'RNN/BasicRNNCell_1/Tanh:0' shape=(1, 4) dtype=float32>, <tf.Tensor 'RNN/BasicRNNCell_2/Tanh:0' shape=(1, 4) dtype=float32>, <tf.Tensor 'RNN/BasicRNNCell_3/Tanh:0' shape=(1, 4) dtype=float32>]
Tensor("RNN/BasicRNNCell_3/Tanh:0", shape=(1, 4), dtype=float32)


## Cost

In [9]:
# logits: list of 2D Tensors of shape [batch_size x num_decoder_symbols].
# targets: list of 1D batch-sized int32 Tensors of the same length as logits.
#weights: lits of 1D batch-sized float-Tensors of the same length as logtis.
logits = tf.reshape(tf.concat(1, outputs), # shape = 1 x 16
                    [-1, rnn_size])        # shape = 4 x 4
targets = tf.reshape(ground_truth[1:], [-1]) # a shape of [-1] flattens into 1-D
weights = tf.ones([len(char_dic) * batch_size])

In [10]:
loss = tf.nn.seq2seq.sequence_loss_by_example([logits], [targets], [weights])
cost = tf.reduce_sum(loss) / batch_size
train_op = tf.train.RMSPropOptimizer(0.01, 0.9).minimize(cost)

In [12]:
# Launch the graph in a session
with tf.Session() as sess:
    tf.global_variables_initializer().run()
    for i in range(100):
        sess.run(train_op)
        result = sess.run(tf.argmax(logits, 1))
        print(result, [char_rdic[t] for t in result])

[1 3 1 3] ['e', 'o', 'e', 'o']
[1 3 1 3] ['e', 'o', 'e', 'o']
[1 3 1 3] ['e', 'o', 'e', 'o']
[1 3 1 3] ['e', 'o', 'e', 'o']
[1 3 1 3] ['e', 'o', 'e', 'o']
[1 3 1 3] ['e', 'o', 'e', 'o']
[1 3 1 3] ['e', 'o', 'e', 'o']
[1 3 1 3] ['e', 'o', 'e', 'o']
[1 3 1 3] ['e', 'o', 'e', 'o']
[1 3 1 3] ['e', 'o', 'e', 'o']
[1 3 1 3] ['e', 'o', 'e', 'o']
[1 3 1 3] ['e', 'o', 'e', 'o']
[1 3 1 3] ['e', 'o', 'e', 'o']
[1 3 1 3] ['e', 'o', 'e', 'o']
[1 3 1 3] ['e', 'o', 'e', 'o']
[1 3 1 3] ['e', 'o', 'e', 'o']
[1 3 2 3] ['e', 'o', 'l', 'o']
[1 3 2 3] ['e', 'o', 'l', 'o']
[1 3 2 3] ['e', 'o', 'l', 'o']
[1 3 2 3] ['e', 'o', 'l', 'o']
[1 2 2 3] ['e', 'l', 'l', 'o']
[1 2 2 3] ['e', 'l', 'l', 'o']
[1 2 2 3] ['e', 'l', 'l', 'o']
[1 2 2 3] ['e', 'l', 'l', 'o']
[1 2 2 3] ['e', 'l', 'l', 'o']
[1 2 2 3] ['e', 'l', 'l', 'o']
[1 2 2 3] ['e', 'l', 'l', 'o']
[1 2 2 3] ['e', 'l', 'l', 'o']
[1 2 2 3] ['e', 'l', 'l', 'o']
[1 2 2 3] ['e', 'l', 'l', 'o']
[1 2 2 3] ['e', 'l', 'l', 'o']
[1 2 2 3] ['e', 'l', 'l', 'o']
[1 2 2 3

참고 : https://gist.github.com/j-min/481749dcb853b4477c4f441bf7452195