In [53]:
"""(Vanila) RNN(Recurrent Neural Network, 순환신경망) : RNN 중 가장 단순한 모델

현재의 상태값 h_t = f(h_t-1, x_t) = tanh(W_hh * h_t-1 + W_hx * x_t)
현재의 출력 y_t = W_hy * h_t

RNN applications : https://github.com/TensorFlowKR/awesome_tensorflow_implementations

"""

""" RNN in tesorflow 

1. 상태값을 내는 셀을 만든다 : cell = tf.contrib.rnn.BasicRNNCell(num_units=hidden_size) , num_units 는 출력(상태값)의 개수, 셀은 자신이 사용하는 모델 등에 따라 BasicLSTMCell 등으로 교체 가능
2. 셀을 구동해서 출력을 받는다 : output, _state = tf.nn.dynamic_rnn(cell, x_data, dtype = tf.float32) , 이때 output이 h_t가 됨

"""

#RNN의 입력과 출력
import tensorflow as tf
import numpy as np
from tensorflow.contrib import rnn
import pprint
pp = pprint.PrettyPrinter(indent=4)
sess = tf.InteractiveSession()

# 원핫인코딩 'hello'
h = [1, 0, 0, 0]
e = [0, 1, 0, 0]
l = [0, 0, 1, 0]
o = [0, 0, 0, 1]

# 이때 입력값 x의 shape는 (1,1,4)가 되도록 하며 출력은 (1,1,n)이 되도록 hidden size를 통해 조정

In [55]:
with tf.variable_scope('one_cell5') as scope: #one_variable이란 variable_scope를 한번 사용하면 동일한 작업 불가능
    # One cell RNN input_dim (4) -> output_dim (2)
    hidden_size = 2
    cells = tf.contrib.rnn.BasicRNNCell(num_units=hidden_size)
    
    x_data = np.array([[h]], dtype=np.float32) # x_data = [[[1,0,0,0]]]
    pp.pprint(x_data)
    outputs, _states = tf.nn.dynamic_rnn(cells, x_data, dtype=tf.float32)
    
    sess.run(tf.global_variables_initializer())
    pp.pprint(outputs.eval())

array([[[1., 0., 0., 0.]]], dtype=float32)
array([[[0.00273436, 0.52379775]]], dtype=float32)


In [56]:
# input의 shape와 output의 (a,b,c) c가 각각 input_dimension, hidden_size였다면 b는 입력 데이터의 길이(sequence length)를 의미
with tf.variable_scope('one_cell6') as scope:
# One cell RNN input_dim (4) -> output_dim (2)
    hidden_size = 2
    cells = tf.contrib.rnn.BasicRNNCell(num_units=hidden_size)

    x_data = np.array([[h,e,l,l,o]], dtype=np.float32) 
    pp.pprint(x_data)
    outputs, _states = tf.nn.dynamic_rnn(cells, x_data, dtype=tf.float32)

    sess.run(tf.global_variables_initializer())
    pp.pprint(outputs.eval())

array([[[1., 0., 0., 0.],
        [0., 1., 0., 0.],
        [0., 0., 1., 0.],
        [0., 0., 1., 0.],
        [0., 0., 0., 1.]]], dtype=float32)
array([[[ 0.5437193 ,  0.46096808],
        [-0.40864253,  0.5796649 ],
        [ 0.28415743, -0.6981258 ],
        [-0.6302551 , -0.58918905],
        [ 0.5594544 ,  0.1453845 ]]], dtype=float32)


In [60]:
#input shape와 output의 (a,b,c)에서 a는 batch_size -> 학습할 데이터의 양
with tf.variable_scope('one_cell9') as scope:
# One cell RNN input_dim (4) -> output_dim (2)
    hidden_size = 2
    cells = tf.contrib.rnn.BasicRNNCell(num_units=hidden_size)

    x_data = np.array([[h,e,l,l,o], [e,l,l,h,o],[e,h,o,l,l]], dtype=np.float32) # x_data = [[[1,0,0,0]]]
    pp.pprint(x_data)
    outputs, _states = tf.nn.dynamic_rnn(cells, x_data, dtype=tf.float32)

    sess.run(tf.global_variables_initializer())
    pp.pprint(outputs.eval())

array([[[1., 0., 0., 0.],
        [0., 1., 0., 0.],
        [0., 0., 1., 0.],
        [0., 0., 1., 0.],
        [0., 0., 0., 1.]],

       [[0., 1., 0., 0.],
        [0., 0., 1., 0.],
        [0., 0., 1., 0.],
        [1., 0., 0., 0.],
        [0., 0., 0., 1.]],

       [[0., 1., 0., 0.],
        [1., 0., 0., 0.],
        [0., 0., 0., 1.],
        [0., 0., 1., 0.],
        [0., 0., 1., 0.]]], dtype=float32)
array([[[ 0.26099256, -0.23209658],
        [-0.19653676,  0.319594  ],
        [ 0.2630962 , -0.19141088],
        [ 0.19510952, -0.73888487],
        [-0.51928085, -0.00628737]],

       [[-0.13814099,  0.61244816],
        [ 0.14025319, -0.00230351],
        [ 0.1967387 , -0.60706186],
        [ 0.3502664 , -0.70193034],
        [-0.58453816, -0.09241634]],

       [[-0.13814099,  0.61244816],
        [ 0.13513002,  0.3441358 ],
        [-0.71843004,  0.7203274 ],
        [ 0.38907456,  0.47649246],
        [-0.09080583, -0.46673477]]], dtype=float32)


In [89]:
#Teach RNN 'hihello'
# h ->i , i -> h, h ->e, ... , l -> o 처럼 다음글자를 예측할 수 있도록 학습
import tensorflow as tf
import numpy as np
tf.set_random_seed(777)  # reproducibility

idx2char = ['h', 'i', 'e', 'l', 'o'] #문자열
# Teach hello: hihell -> ihello
x_data = [[0, 1, 0, 2, 3, 3]]   # 입력 : hihell
x_one_hot = [[[1, 0, 0, 0, 0],   # h 0
              [0, 1, 0, 0, 0],   # i 1
              [1, 0, 0, 0, 0],   # h 0
              [0, 0, 1, 0, 0],   # e 2
              [0, 0, 0, 1, 0],   # l 3
              [0, 0, 0, 1, 0]]]  # l 3

y_data = [[1, 0, 2, 3, 3, 4]]    # 출력 : ihello


with tf.variable_scope('one_cell12') as scope: #다시 시작해도 변경해주어야 함
    num_classes = 5 # 문자열의 개수
    input_dim = 5  # 입력 문자열의 개수
    hidden_size = 5  # 출력 문자열의 개수
    batch_size = 1   # one sentence
    sequence_length = 6  # |ihello| == 6
    learning_rate = 0.1

    X = tf.placeholder(
        tf.float32, [None, sequence_length, input_dim])  # X one-hot
    Y = tf.placeholder(tf.int32, [None, sequence_length])  # Y label: ihello

    cell = tf.contrib.rnn.BasicLSTMCell(num_units=hidden_size, state_is_tuple=True) #셀만들기
    initial_state = cell.zero_state(batch_size, tf.float32) #초기값 배정
    outputs, _states = tf.nn.dynamic_rnn(cell, X, initial_state=initial_state, dtype=tf.float32) # 셀 구동




In [96]:
# Sequence RNN에서 사용하는 cost function : Sequnce loss 예시
y_data = tf.constant([[1,1,1,]])

prediction = tf.constant([[[0.2,0.7],[0.3,0.5],[0.7,0.2]]], dtype = tf.float32)

weight = tf.constant([[1,1,1]], dtype = tf.float32)

sequence_loss = tf.contrib.seq2seq.sequence_loss(logits=prediction, targets = y_data, weights = weight)
sess = tf.Session()
sess.run(tf.global_variables_initializer())
print('Loss: ', sess.run(sequence_loss))

Loss:  0.6820976


In [90]:
weights = tf.ones([batch_size, sequence_length])
sequence_loss = tf.contrib.seq2seq.sequence_loss(logits=outputs, targets=Y, weights=weights)
loss = tf.reduce_mean(sequence_loss)
train = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss)


In [91]:
#학습
prediction = tf.argmax(outputs, axis=2)

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for i in range(50):
        l, _ = sess.run([loss, train], feed_dict={X: x_one_hot, Y: y_data})
        result = sess.run(prediction, feed_dict={X: x_one_hot})
        print(i, "loss:", l, "prediction: ", result, "true Y: ", y_data)

        # print char using dic
        result_str = [idx2char[c] for c in np.squeeze(result)]
        print("\tPrediction str: ", ''.join(result_str))

0 loss: 1.6035075 prediction:  [[3 3 3 3 3 3]] true Y:  [[1, 0, 2, 3, 3, 4]]
	Prediction str:  llllll
1 loss: 1.5366507 prediction:  [[3 3 3 3 3 3]] true Y:  [[1, 0, 2, 3, 3, 4]]
	Prediction str:  llllll
2 loss: 1.4777532 prediction:  [[3 3 3 3 3 3]] true Y:  [[1, 0, 2, 3, 3, 4]]
	Prediction str:  llllll
3 loss: 1.4193172 prediction:  [[3 3 3 3 3 3]] true Y:  [[1, 0, 2, 3, 3, 4]]
	Prediction str:  llllll
4 loss: 1.3533754 prediction:  [[3 3 3 3 3 3]] true Y:  [[1, 0, 2, 3, 3, 4]]
	Prediction str:  llllll
5 loss: 1.2810712 prediction:  [[2 3 3 3 3 3]] true Y:  [[1, 0, 2, 3, 3, 4]]
	Prediction str:  elllll
6 loss: 1.210656 prediction:  [[2 3 2 3 3 3]] true Y:  [[1, 0, 2, 3, 3, 4]]
	Prediction str:  elelll
7 loss: 1.153573 prediction:  [[2 0 2 3 3 3]] true Y:  [[1, 0, 2, 3, 3, 4]]
	Prediction str:  ehelll
8 loss: 1.1059629 prediction:  [[2 0 2 3 3 3]] true Y:  [[1, 0, 2, 3, 3, 4]]
	Prediction str:  ehelll
9 loss: 1.0641327 prediction:  [[2 0 2 3 3 3]] true Y:  [[1, 0, 2, 3, 3, 4]]
	Predic

In [92]:
# RNN with long sequence
import tensorflow as tf
import numpy as np
tf.set_random_seed(777)  # reproducibility

sample = " if you want you"
idx2char = list(set(sample))  # index -> char : 중복을 없애 리스트로 받기
char2idx = {c: i for i, c in enumerate(idx2char)}  # char -> index로 해서 딕셔너리로 받기

sample_idx = [char2idx[c] for c in sample]  # char to index
x_data = [sample_idx[:-1]]  # X data sample (0 ~ n-1) hello: hell
y_data = [sample_idx[1:]]   # Y label sample (1 ~ n) hello: ello

dic_size = len(char2idx)  # RNN input size (one hot size)
hidden_size = len(char2idx)  # RNN output size
num_classes = len(char2idx)  # final output size (RNN or softmax, etc.)
batch_size = 1  # one sample data, one batch
sequence_length = len(sample) - 1  # number of lstm rollings (unit #)
learning_rate = 0.1

X = tf.placeholder(tf.int32, [None, sequence_length])  # X data
Y = tf.placeholder(tf.int32, [None, sequence_length])  # Y label

x_one_hot = tf.one_hot(X, num_classes)  # one hot: 1 -> 0 1 0 0 0 0 0 0 0 0

with tf.variable_scope('lstm3') as score:
    cell = tf.contrib.rnn.BasicLSTMCell(num_units=hidden_size, state_is_tuple=True)
    initial_state = cell.zero_state(batch_size, tf.float32)
    outputs, _states = tf.nn.dynamic_rnn(cell, x_one_hot, initial_state=initial_state, dtype=tf.float32)
    

In [93]:
weights = tf.ones([batch_size, sequence_length])
sequence_loss = tf.contrib.seq2seq.sequence_loss(
    logits=outputs, targets=Y, weights=weights)
loss = tf.reduce_mean(sequence_loss)
train = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss)

prediction = tf.argmax(outputs, axis=2)

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for i in range(50):
        l, _ = sess.run([loss, train], feed_dict={X: x_data, Y: y_data})
        result = sess.run(prediction, feed_dict={X: x_data})

        # print char using dic
        result_str = [idx2char[c] for c in np.squeeze(result)]

        print(i, "loss:", l, "Prediction:", ''.join(result_str))

0 loss: 2.2847593 Prediction: yyyy u u  yyyuu
1 loss: 2.1808865 Prediction: yyyy u     uuuu
2 loss: 2.061739 Prediction: yyyy u      uuu
3 loss: 1.9443986 Prediction: yyyyou aa  yyou
4 loss: 1.8236507 Prediction: yyyyouoaa yyyou
5 loss: 1.7534765 Prediction: yyyy   aa  yyou
6 loss: 1.700007 Prediction: yyyy   aa  yyou
7 loss: 1.653459 Prediction: yyyy   aa   y u
8 loss: 1.5864625 Prediction: yyyy   aa   yo 
9 loss: 1.5305682 Prediction: yyyy   aa   yo 
10 loss: 1.4798074 Prediction: yfyy   aan  you
11 loss: 1.4330196 Prediction: yfyyou aan  you
12 loss: 1.3850362 Prediction: yf you aant you
13 loss: 1.3440365 Prediction: yf you wantyyou
14 loss: 1.3105631 Prediction: yf you want you
15 loss: 1.2805848 Prediction: yf you want you
16 loss: 1.2549009 Prediction: yf youuwant you
17 loss: 1.2394685 Prediction: yf youuwant you
18 loss: 1.226584 Prediction: yf youuwant you
19 loss: 1.2068338 Prediction: yf youuwant you
20 loss: 1.1841968 Prediction: if youwwant you
21 loss: 1.1631689 Predicti

In [None]:
# RNN with long sequence (다중 레이어와 softmax를 결합)
# Deep 한 RNN을 만들기 위해 RNN을 쌓음 
# cell = tf.rnn.BasicLSTMCell(hidden_size, state_is_tuple = True)
# cell = tf.rnn.MultiRNNCell([cell] * 2, state_is_tuple = True) #셀을 2층으로 쌓음


from __future__ import print_function

import tensorflow as tf
import numpy as np
from tensorflow.contrib import rnn

tf.set_random_seed(777)  # reproducibility

sentence = ("if you want to build a ship, don't drum up people together to "
            "collect wood and don't assign them tasks and work, but rather "
            "teach them to long for the endless immensity of the sea.")

char_set = list(set(sentence))
char_dic = {w: i for i, w in enumerate(char_set)}

data_dim = len(char_set)
hidden_size = len(char_set)
num_classes = len(char_set)
sequence_length = 10  # Any arbitrary number
learning_rate = 0.1

dataX = []
dataY = []
for i in range(0, len(sentence) - sequence_length):
    x_str = sentence[i:i + sequence_length]
    y_str = sentence[i + 1: i + sequence_length + 1]
    print(i, x_str, '->', y_str)

    x = [char_dic[c] for c in x_str]  # x str to index
    y = [char_dic[c] for c in y_str]  # y str to index

    dataX.append(x)
    dataY.append(y)

batch_size = len(dataX)

X = tf.placeholder(tf.int32, [None, sequence_length])
Y = tf.placeholder(tf.int32, [None, sequence_length])

# One-hot encoding
X_one_hot = tf.one_hot(X, num_classes)
print(X_one_hot)  # check out the shape


# Make a lstm cell with hidden_size (each unit output vector size)
def lstm_cell():
    cell = rnn.BasicLSTMCell(hidden_size, state_is_tuple=True)
    return cell

multi_cells = rnn.MultiRNNCell([lstm_cell() for _ in range(2)], state_is_tuple=True)

# outputs: unfolding size x hidden size, state = hidden size
outputs, _states = tf.nn.dynamic_rnn(multi_cells, X_one_hot, dtype=tf.float32)

# FC layer(소프트맥스 사용)
X_for_fc = tf.reshape(outputs, [-1, hidden_size])
outputs = tf.contrib.layers.fully_connected(X_for_fc, num_classes, activation_fn=None)

# reshape out for sequence_loss
outputs = tf.reshape(outputs, [batch_size, sequence_length, num_classes])

# All weights are 1 (equal weights)
weights = tf.ones([batch_size, sequence_length])

sequence_loss = tf.contrib.seq2seq.sequence_loss(
    logits=outputs, targets=Y, weights=weights)
mean_loss = tf.reduce_mean(sequence_loss)
train_op = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(mean_loss)

sess = tf.Session()
sess.run(tf.global_variables_initializer())

for i in range(500):
    _, l, results = sess.run(
        [train_op, mean_loss, outputs], feed_dict={X: dataX, Y: dataY})
    for j, result in enumerate(results):
        index = np.argmax(result, axis=1)
        print(i, j, ''.join([char_set[t] for t in index]), l)

# Let's print the last char of each result to check it works
results = sess.run(outputs, feed_dict={X: dataX})
for j, result in enumerate(results):
    index = np.argmax(result, axis=1)
    if j is 0:  # print all for the first result to make a sentence
        print(''.join([char_set[t] for t in index]), end='')
    else:
        print(char_set[index[-1]], end='')