In [1]:
import tensorflow as tf
import numpy as np


# min-char-rnn 링크: https://gist.github.com/karpathy/d4dee566867f8291f086
#
# output: ['e']  ['l']  ['l']  ['o']  ['w']    min-char-rnn tensorflow 버전 구현.
#           ^      ^      ^      ^      ^      input.txt에 있는 문장을 language model을 이용하여 생성하는 것이 목표. 
#           |      |      |      |      |      
#          +-+    +-+    +-+    +-+    +-+     
#          |1| -> |2| -> |3| -> |4| -> |5|     
#          +-+    +-+    +-+    +-+    +-+     input에는 마지막 문자('w')를 제외시키고
#           ^      ^      ^      ^      ^      output에는 첫 문자('h')를 제외시키므로
#           |      |      |      |      |      time_step_size = (전체 문장의 길이 - 1)가 된다.
# input : ['h']  ['e']  ['l']  ['l']  ['o']



# 참고: tf.nn.rnn_cell과 tf.nn.rnn 함수를 사용하는 방법을 이해하는 데에 오래 걸렸다.

In [2]:
# 학습할 데이터를 불러온다
data = open('input.txt', 'r').read() 
start_char = data[0]
chars = list(set(data))
data_size, vocab_size = len(data), len(chars)

print 'data has %d characters, %d unique.' % (data_size, vocab_size)
char_to_idx = {ch:idx for idx,ch in enumerate(chars)}
idx_to_char = {idx:ch for idx,ch in enumerate(chars)}

# hyperparameters
num_epochs = 301
num_sampling = 10
hidden_size = 100 
num_time_steps = data_size -1
learning_rate = 1e-1

# model parameters
W = tf.Variable(tf.truncated_normal([hidden_size, vocab_size], stddev=0.1)) 
b = tf.Variable(tf.zeros([vocab_size]))

data has 66 characters, 23 unique.


In [3]:
# generate character index dataset
X = []
y = []
for i in range(data_size - 1):
    X.append(char_to_idx[data[i]])
    y.append(char_to_idx[data[i+1]])
    
X_onehot = tf.cast(tf.one_hot(X, vocab_size, on_value=1), tf.float32)
y_onehot = tf.cast(tf.one_hot(y, vocab_size, on_value=1), tf.float32)
print "input data의 차원: ", X_onehot.get_shape()
print "out data의 차원: ", y_onehot.get_shape()

# tf.nn.rnn 함수에 인자로 넣어주기 위해 형식을 맞춰준다 time_step @ (batch_size, input_size)
X_split = tf.split(0, num_time_steps, X_onehot)
print "list의 길이: ", len(X_split) # 60 @ (1, 19)
print "각 원소의 차원: ", X_split[0].get_shape()

input data의 차원:  (65, 23)
out data의 차원:  (65, 23)
list의 길이:  65
각 원소의 차원:  (1, 23)


In [4]:
def model():
    cell = tf.nn.rnn_cell.BasicRNNCell(hidden_size)
    outputs, _ = tf.nn.rnn(cell, X_split, dtype=tf.float32) # 60 @ (1, 100)
    h = tf.concat(0, outputs) # (60, 100)
    logits = tf.matmul(h, W) + b  # (60, 19)
    loss = tf.reduce_sum(tf.nn.softmax_cross_entropy_with_logits(logits, y_onehot))
    max_char = tf.argmax(logits, 1)
    return loss, max_char

loss, max_char = model()
optimizer = tf.train.RMSPropOptimizer(0.01).minimize(loss)

In [5]:
with tf.Session() as sess:
    tf.initialize_all_variables().run()
    for e in range(num_epochs):
        _, l = sess.run([optimizer, loss])
        if e % 10 == 0:
            print "loss at epoch %d" %e,": ", l   # 문장에 대한 softmax cross entropy loss 값
        if e % 100 == 0:
            idxs = sess.run(max_char)
            sentence = [idx_to_char[idx] for idx in idxs]
            print "생성된 문장: %s" %(start_char+''.join(sentence))

loss at epoch 0 :  205.755
생성된 문장: HiNN oieepsplsaReeeeeaeeueeNsHpeeleplesneninpheepauaaiepeeeeeosinn
loss at epoch 10 :  169.027
loss at epoch 20 :  159.723
loss at epoch 30 :  128.92
loss at epoch 40 :  91.6823
loss at epoch 50 :  13.2543
loss at epoch 60 :  4.14548
loss at epoch 70 :  1.68452
loss at epoch 80 :  0.807435
loss at epoch 90 :  0.414715
loss at epoch 100 :  0.221801
생성된 문장: Hello, this is a sample sentence for training RNN language model. 
loss at epoch 110 :  0.121619
loss at epoch 120 :  0.0677693
loss at epoch 130 :  0.0381707
loss at epoch 140 :  0.0216569
loss at epoch 150 :  0.0123494
loss at epoch 160 :  0.00706621
loss at epoch 170 :  0.00405509
loss at epoch 180 :  0.0023318
loss at epoch 190 :  0.00134442
loss at epoch 200 :  0.000775808
생성된 문장: Hello, this is a sample sentence for training RNN language model. 
loss at epoch 210 :  0.000448106
loss at epoch 220 :  0.000259399
loss at epoch 230 :  0.0001508
loss at epoch 240 :  8.86916e-05
loss at epoch 250 :  