https://gist.github.com/karpathy/d4dee566867f8291f086

"""
Minimal character-level Vanilla RNN model. Written by Andrej Karpathy (@karpathy)
BSD License
"""

In [2]:
import numpy as np

In [4]:
# data I/O

data = open('input.txt', 'r').read()
chars = list(set(data))
data_size, vocab_size = len(data), len(chars)

print('data has %d characters, %d unique.' %(data_size, vocab_size))

char_to_ix = { ch:i for i, ch in enumerate(chars)}
ix_to_char = { i:ch for i, ch in enumerate(chars)}

data has 11 characters, 8 unique.


In [15]:
char_to_ix

{'r': 0, 'h': 1, 'd': 2, ' ': 3, 'o': 4, 'l': 5, 'e': 6, 'w': 7}

In [16]:
ix_to_char

{0: 'r', 1: 'h', 2: 'd', 3: ' ', 4: 'o', 5: 'l', 6: 'e', 7: 'w'}

In [10]:
# hyperparameter

hidden_size = 100
seq_length = 4
learning_rate = 1e-1

In [11]:
# model parameters

Wxh = np.random.randn(hidden_size, vocab_size)*0.01
Whh = np.random.randn(hidden_size, hidden_size)*0.01
Why = np.random.randn(vocab_size, hidden_size)*0.01
bh = np.zeros((hidden_size, 1))
by = np.zeros((vocab_size, 1))

In [12]:
def lossFun(inputs, targets, hprev):
    """
    inputs,targets are both list of integers.
    hprev is Hx1 array of initial hidden state
    returns the loss, gradients on model parameters, and last hidden state
    """
    
    xs, hs, ys, ps = {}, {}, {}, {}
    hs[-1] = np.copy(hprev)
    loss=0
    #forward pass
    for t in range(len(inputs)):
        xs[t] = np.zeros((vocab_size, 1))
        xs[t][inputs[t]] = 1
        hs[t] = np.tanh(np.dot(Wxh, xs[t]) + np.dot(Whh, hs[t-1]) + bh)
        ys[t] = np.dot(Why, hs[t]) + by
        ps[t] = np.exp(ys[t]) / np.sum(np.exp(ys[t]))
        loss += (-np.log(ps[t][targets[t], 0]))
    # backward pass: compute gradients going backwards
    dWxh, dWhh, dWhy = np.zeros_like(Wxh), np.zeros_like(Whh), np.zeros_like(Why)
    dbh, dby = np.zeros_like(bh), np.zeros_like(by)
    dhnext = np.zeros_like(hs[0])
    for t in reversed(range(len(inputs))):
        dy = np.copy(ps[t])
        dy[targets[t]] -= 1 # backprop into y. see http://cs231n.github.io/neural-networks-case-study/#grad if confused here
        dWhy += np.dot(dy, hs[t].T)
        dby += dy
        dh = np.dot(Why.T, dy) + dhnext # backprop into h
        dhraw = (1 - hs[t] * hs[t]) * dh # backprop through tanh nonlinearity
        dbh += dhraw
        dWxh += np.dot(dhraw, xs[t].T)
        dWhh += np.dot(dhraw, hs[t-1].T)
        dhnext = np.dot(Whh.T, dhraw)
    for dparam in [dWxh, dWhh, dWhy, dbh, dby]:
        np.clip(dparam, -5, 5, out=dparam) # clip to mitigate exploding gradients
    return loss, dWxh, dWhh, dWhy, dbh, dby, hs[len(inputs)-1]

In [13]:
def sample(h, seed_ix, n):
    """ 
    sample a sequence of integers from the model 
    h is memory state, seed_ix is seed letter for first time step
    """
    x = np.zeros((vocab_size, 1))
    x[seed_ix] = 1
    ixes = []
    for t in range(n):
        h = np.tanh(np.dot(Wxh, x) + np.dot(Whh, h) + bh)
        y = np.dot(Why, h) + by
        p = np.exp(y) / np.sum(np.exp(y))
        ix = np.random.choice(range(vocab_size), p=p.ravel())
        x = np.zeros((vocab_size, 1))
        x[ix] = 1
        ixes.append(ix)
        
    return ixes

In [14]:
n, p = 0, 0
mWxh, mWhh, mWhy = np.zeros_like(Wxh), np.zeros_like(Whh), np.zeros_like(Why)
mbh, mby = np.zeros_like(bh), np.zeros_like(by) # memory variables for Adagrad
smooth_loss = -np.log(1.0/vocab_size)*seq_length # loss at iteration 0

i=0
while i<10:
    # prepare inputs (we're sweeping from left to right in steps seq_length long)
    if p+seq_length+1 >= len(data) or n == 0: 
        hprev = np.zeros((hidden_size,1)) # reset RNN memory
        p = 0 # go from start of data
    inputs = [char_to_ix[ch] for ch in data[p:p+seq_length]]
    targets = [char_to_ix[ch] for ch in data[p+1:p+seq_length+1]]

    # sample from the model now and then
    if n % 100 == 0:
        sample_ix = sample(hprev, inputs[0], 200)
        txt = ''.join(ix_to_char[ix] for ix in sample_ix)
        print ('----\n %s \n----' % (txt))

    # forward seq_length characters through the net and fetch gradient
    loss, dWxh, dWhh, dWhy, dbh, dby, hprev = lossFun(inputs, targets, hprev)
    smooth_loss = smooth_loss * 0.999 + loss * 0.001
    if n % 100 == 0: 
        print('iter %d, loss: %f' % (n, smooth_loss)) # print progress
  
    # perform parameter update with Adagrad
    for param, dparam, mem in zip([Wxh, Whh, Why, bh, by], 
                                  [dWxh, dWhh, dWhy, dbh, dby], 
                                  [mWxh, mWhh, mWhy, mbh, mby]):
        mem += dparam * dparam
        param += -learning_rate * dparam / np.sqrt(mem + 1e-8) # adagrad update

    p += seq_length # move data pointer
    n += 1 # iteration counter
    i+=1

----
 dehloede hrh hlrhwl ol hwe  rwwrlorloeewlorehedlrdeowr dh lddorwer owewwreoeew  hwd l ew hhwdrl dlhewwlelohelr  rwr rlhoherrl hdl doddolwheldwel olerrod do dd rreehewhldrdworhh orwhlhederrwd eleowl eh 
----
iter 0, loss: 8.317768


# p.256(tensorflow로 rnn)

In [3]:
import tensorflow as tf
tf.reset_default_graph()

X_data = np.array([[[0., 1., 2., 3.]]])
batch_size = 1
hidden_size = 1
X = tf.placeholder(tf.float32, [None, 1, 4])

cell = tf.nn.rnn_cell.BasicRNNCell(num_units = hidden_size)
initial_state = cell.zero_state(batch_size,tf.float32)
outputs, state = tf.nn.dynamic_rnn(cell=cell, inputs=X,
                              initial_state=initial_state, dtype=tf.float32)

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    variables_names = [v.name for v in tf.trainable_variables()]
    values = sess.run(variables_names)
    for k,v in zip(variables_names, values):
        print(k, '\n',v)
    print('-'*30)
    outputs_val, state_val = sess.run([outputs,state], feed_dict={X:X_data})
    print('init state:\n', sess.run(initial_state),
         '\nX_data val:\n', X_data,
         '\noutput val:\n', outputs_val,
         '\nstate val:\n',state_val) 

W0723 17:01:21.404147 15180 deprecation.py:323] From <ipython-input-3-30fd55ae537c>:9: BasicRNNCell.__init__ (from tensorflow.python.ops.rnn_cell_impl) is deprecated and will be removed in a future version.
Instructions for updating:
This class is equivalent as tf.keras.layers.SimpleRNNCell, and will be replaced by that in Tensorflow 2.0.
W0723 17:01:21.412126 15180 deprecation.py:323] From <ipython-input-3-30fd55ae537c>:12: dynamic_rnn (from tensorflow.python.ops.rnn) is deprecated and will be removed in a future version.
Instructions for updating:
Please use `keras.layers.RNN(cell)`, which is equivalent to this API
W0723 17:01:21.450065 15180 deprecation.py:506] From C:\Users\Affinity\Anaconda3\lib\site-packages\tensorflow\python\ops\init_ops.py:1251: calling VarianceScaling.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it 

rnn/basic_rnn_cell/kernel:0 
 [[-0.77403975]
 [ 0.2562201 ]
 [ 0.21532774]
 [-0.7230842 ]
 [-0.99690676]]
rnn/basic_rnn_cell/bias:0 
 [0.]
------------------------------
init state:
 [[0.]] 
X_data val:
 [[[0. 1. 2. 3.]]] 
output val:
 [[[-0.9019123]]] 
state val:
 [[-0.9019123]]


In [34]:
tf.reset_default_graph()

X_data = np.array([[[0., 1., 2., 3.]]])
batch_size = 1
hidden_size = 3
X = tf.placeholder(tf.float32, [None, 1, 4])

cell = tf.nn.rnn_cell.BasicRNNCell(num_units = hidden_size)
initial_state = cell.zero_state(batch_size,tf.float32)
outputs, state = tf.nn.dynamic_rnn(cell=cell, inputs=X,
                              initial_state=initial_state, dtype=tf.float32)

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    variables_names = [v.name for v in tf.trainable_variables()]
    print(variables_names)
    values = sess.run(variables_names)
    print(variables_names)
    for k,v in zip(variables_names, values):
        print(k, '\n',v)
    print('-'*30)
    outputs_val, state_val = sess.run([outputs,state], feed_dict={X:X_data})
    print('init state:\n', sess.run(initial_state),
         '\nX_data val:\n', X_data,
         '\noutput val:\n', outputs_val,
         '\nstate val:\n',state_val) 

['rnn/basic_rnn_cell/kernel:0', 'rnn/basic_rnn_cell/bias:0']
['rnn/basic_rnn_cell/kernel:0', 'rnn/basic_rnn_cell/bias:0']
rnn/basic_rnn_cell/kernel:0 
 [[-4.5961630e-01 -3.8549718e-01  2.5870693e-01]
 [-7.5039923e-02  9.0887189e-02 -5.3411722e-04]
 [-4.9265543e-01 -7.1770543e-01 -2.6016712e-01]
 [-3.5555184e-02  4.7998822e-01 -1.9110227e-01]
 [ 6.5887523e-01 -8.1703484e-02  2.4831617e-01]
 [ 3.7258518e-01  5.8739328e-01 -7.0440990e-01]
 [ 6.8356395e-01 -3.8215816e-02 -7.3230761e-01]]
rnn/basic_rnn_cell/bias:0 
 [0. 0. 0.]
------------------------------
init state:
 [[0. 0. 0.]] 
X_data val:
 [[[0. 1. 2. 3.]]] 
output val:
 [[[-0.8233133   0.09515224 -0.798397  ]]] 
state val:
 [[-0.8233133   0.09515224 -0.798397  ]]


In [41]:
tf.reset_default_graph()

def seqtostr(input):
    return ''.join(ix_to_char[ch] for ch in input[:])

def sample(x, n, cell, Why, by):
    W = cell[0]
    Wxh = W[:vocab_size]
    Whh = W[vocab_size:]
    bh = cell[1]
    h = np.zeros((1, hidden_size))
    ixes = []
    
    for step in range(n):
        h = np.tanh(np.dot(x, Wxh) + np.dot(h, Whh) + bh)
        y = np.dot(h, Why) + by
        ix = np.argmax(y)
        x = np.zeros((1, vocab_size))
        x[0][ix] = 1
        ixes.append(ix)
    
    return ixes

In [44]:
data='hihello'
chars = list(set(data))
vocab_size = len(chars)
char_to_ix = { ch:i for i, ch in enumerate(chars)}
ix_to_char = { i:ch for i, ch in enumerate(chars)}
X_data = [char_to_ix[ch] for ch in data]
X_onehot = tf.one_hot(X_data, vocab_size)

batch_size = 1
hidden_size = 10
seq_length = len(data) -1

Why = tf.Variable(tf.random_normal([batch_size, hidden_size, vocab_size]), name='weight_hy')
by = tf.Variable(tf.random_normal([batch_size, 1, vocab_size]), name='bias_y')

X = tf.placeholder(tf.float32, [None, 1, vocab_size])
cell = tf.nn.rnn_cell.BasicRNNCell(num_units = hidden_size)
initial_state = cell.zero_state(batch_size,tf.float32)
outputs, state = tf.nn.dynamic_rnn(cell=cell, inputs=X, initial_state=initial_state, dtype=tf.float32)

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    X_test = sess.run(X_onehot[0]).reshape(1, vocab_size)
    predtxt = sample(X_test, seq_length, sess.run(cell.variables), sess.run(Why), sess.run(by))
    print(data[0] + seqtostr(predtxt))

ValueError: Variable rnn/basic_rnn_cell/kernel already exists, disallowed. Did you mean to set reuse=True or reuse=tf.AUTO_REUSE in VarScope? Originally defined at:

  File "C:\Users\Affinity\Anaconda3\lib\site-packages\tensorflow\python\framework\ops.py", line 2005, in __init__
    self._traceback = tf_stack.extract_stack()
  File "C:\Users\Affinity\Anaconda3\lib\site-packages\tensorflow\python\framework\ops.py", line 3616, in create_op
    op_def=op_def)
  File "C:\Users\Affinity\Anaconda3\lib\site-packages\tensorflow\python\util\deprecation.py", line 507, in new_func
    return func(*args, **kwargs)
  File "C:\Users\Affinity\Anaconda3\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 788, in _apply_op_helper
    op_def=op_def)
  File "C:\Users\Affinity\Anaconda3\lib\site-packages\tensorflow\python\ops\gen_state_ops.py", line 2023, in variable_v2
    shared_name=shared_name, name=name)


In [15]:
tf.reset_default_graph() # To prevent graph error of tensorflow

# For convenience
def seqtostr(input): 
    return ''.join(ix_to_char[ch] for ch in input[:])

# Kapathy's test function
def sample(x, n, cell, Why, by):
    W = cell[0]
    Wxh = W[:vocab_size]
    Whh = W[vocab_size:]
    bh = cell[1]
    h = np.zeros((1, hidden_size))
    ixes = []

    for step in range(n):
        h = np.tanh(np.dot(x, Wxh) + np.dot(h, Whh) + bh)
        y = np.dot(h, Why) + by
        # p = np.exp(y) / np.sum(np.exp(y))
        # ix = np.random.choice(list(range(vocab_size)), p=p.ravel())
        ix = np.argmax(y)
        x = np.zeros((1, vocab_size))
        x[0][ix] = 1
        ixes.append(ix)
    return ixes

# Karpathy's preprocessing
data = open('input.txt', 'r').read()
print(data)
chars= list(set(data))
vocab_size = len(chars)
char_to_ix = { ch:i for i,ch in enumerate(chars) }
ix_to_char = { i:ch for i,ch in enumerate(chars) }
print(char_to_ix)

# Preparing training
inputs = [[char_to_ix[ch] for ch in data[:-1]]]
targets = [[char_to_ix[ch] for ch in data[1:]]]
X_train = tf.one_hot(inputs, vocab_size)
Y_train = tf.one_hot(targets, vocab_size)

# Hyperparameters
hidden_size = 10
cost_val = 100 # initial cost
learning_rate = 1e-3 #0.001

# etc.
batch_size = 1
seq_length = len(data) -1

# Placeholder
X = tf.placeholder(tf.float32, [None, seq_length, vocab_size])
Y = tf.placeholder(tf.int32, [None, seq_length, vocab_size])

# Output layer of char-rnn
Why = tf.Variable(tf.random_normal([batch_size, hidden_size, vocab_size], name='weight_hy'))
by = tf.Variable(tf.random_normal([batch_size, 1, vocab_size]), name='bias_y')

# Setting cell, loss function, optimizer
cell = tf.nn.rnn_cell.BasicRNNCell(num_units=hidden_size)
state = cell.zero_state(batch_size, tf.float32)
outputs, state = tf.nn.dynamic_rnn(cell=cell, inputs=X,
                                   initial_state=state, dtype=tf.float32)
logit = tf.matmul(outputs, Why) + by
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=logit, labels=Y))
train = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())    
    for step in range(1000):
        #Test
        if step % 100 == 0:
            X_test = sess.run(X_train[0][0]).reshape(1, vocab_size) # Start from the beginning
            predtxt = sample(X_test, seq_length, sess.run(cell.variables), sess.run(Why), sess.run(by))
            print('step :', step, 'cost :', cost_val,
                  '\npred :', data[0] + seqtostr(predtxt), '\n','-'*50)
            
        # Train
        cost_val, _ = sess.run([cost, train], feed_dict={X:sess.run(X_train), Y:sess.run(Y_train)})

Once upon a midnight dreary, while I pondered, weak and weary,

Over many a quaint and curious volume of forgotten lore-

While I nodded, nearly napping, suddenly there came a tapping,

As of someone gently rapping, rapping at my chamber door.

"'Tis some visitor," I muttered, "tapping at my chamber door-

Only this and nothing more."
{'e': 0, 'T': 1, "'": 2, 'm': 3, 'b': 4, 'I': 5, 'd': 6, 'g': 7, 'r': 8, 't': 9, '"': 10, 'l': 11, 'y': 12, 'h': 13, 'W': 14, 'a': 15, 'p': 16, 's': 17, '.': 18, '\n': 19, 'u': 20, 'c': 21, 'O': 22, 'i': 23, 'f': 24, '-': 25, 'n': 26, 'A': 27, ',': 28, 'w': 29, 'v': 30, ' ': 31, 'k': 32, 'o': 33, 'q': 34}
step : 0 cost : 100 
pred : Obmrrrrrrrrrrrrrrrrmmrrmmrrmmrrmmrrmmrrmmrrmmrrmmrrmmrrmmrrmmrrmmrrmmrrmmrrmmrrmmrrmmrrmmrrmmrrmmrrmmrrmmrrmmrrmmrrmmrrmmrrmmrrmmrrmmrrmmrrmmrrmmrrmmrrmmrrmmrrmmrrmmrrmmrrmmrrmmrrmmrrmmrrmmrrmmrrmmrrmmrrmmrrmmrrmmrrmmrrmmrrmmrrmmrrmmrrmmrrmmrrmmrrmmrrmmrrmmrrmmrrmmrrmmrrmmrrmmrrmmrrmmrrmmrrmmrrmmrrmmrrmmrrmmrrmmrrmmrrmmrrmmrrm

In [5]:
tf.reset_default_graph() # To prevent graph error of tensorflow

def seqtostr(input): 
    return ''.join(ix_to_char[ch] for ch in input[:])

def sample(x, n, cell, Why, by):
    W = cell[0]
    Wxh = W[:vocab_size]
    Whh = W[vocab_size:]
    bh = cell[1]
    h = np.zeros((1, hidden_size))
    ixes = []

    for step in range(n):
        h = np.tanh(np.dot(x, Wxh) + np.dot(h, Whh) + bh)
        y = np.dot(h, Why) + by
        ix = np.argmax(y)
        x = np.zeros((1, vocab_size))
        x[0][ix] = 1
        ixes.append(ix)
    return ixes

data = '안녕하세요'
chars= list(set(data))
vocab_size = len(chars)
char_to_ix = { ch:i for i,ch in enumerate(chars) }
ix_to_char = { i:ch for i,ch in enumerate(chars) }
X_data = [char_to_ix[ch] for ch in data]
X_onehot = tf.one_hot(X_data, vocab_size)

hidden_size = 10
seq_length = len(data)-1
batch_size = 1

Why = tf.Variable(tf.random_normal([batch_size, hidden_size, vocab_size], name='weight_hy'))
by = tf.Variable(tf.random_normal([batch_size, 1, vocab_size]), name='bias_y')

X = tf.placeholder(tf.float32, [None, 1, vocab_size])
cell = tf.nn.rnn_cell.BasicRNNCell(num_units=hidden_size)
initial_state = cell.zero_state(batch_size, tf.float32)
outputs, state = tf.nn.dynamic_rnn(cell=cell, inputs=X,
                                   initial_state=initial_state, dtype=tf.float32)

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    X_test = sess.run(X_onehot[0]).reshape(1, vocab_size)
    predtxt = sample(X_test, seq_length, sess.run(cell.variables), sess.run(Why), sess.run(by))
    print(data[0] + seqtostr(predtxt))

안세안세세


In [7]:
tf.reset_default_graph() # To prevent graph error of tensorflow

# For convenience
def seqtostr(input): 
    return ''.join(ix_to_char[ch] for ch in input[:])

# Kapathy's test function
def sample(x, n, cell, Why, by):
    W = cell[0]
    Wxh = W[:vocab_size]
    Whh = W[vocab_size:]
    bh = cell[1]
    h = np.zeros((1, hidden_size))
    ixes = []

    for step in range(n):
        h = np.tanh(np.dot(x, Wxh) + np.dot(h, Whh) + bh)
        y = np.dot(h, Why) + by
        # p = np.exp(y) / np.sum(np.exp(y))
        # ix = np.random.choice(list(range(vocab_size)), p=p.ravel())
        ix = np.argmax(y)
        x = np.zeros((1, vocab_size))
        x[0][ix] = 1
        ixes.append(ix)
    return ixes

# Karpathy's preprocessing
data = "안녕하세요"

chars= list(set(data))
vocab_size = len(chars)
char_to_ix = { ch:i for i,ch in enumerate(chars) }
ix_to_char = { i:ch for i,ch in enumerate(chars) }

# Preparing training
inputs = [[char_to_ix[ch] for ch in data[:-1]]]
targets = [[char_to_ix[ch] for ch in data[1:]]]
X_train = tf.one_hot(inputs, vocab_size)
Y_train = tf.one_hot(targets, vocab_size)

# Hyperparameters
hidden_size = 10
cost_val = 100 # initial cost
learning_rate = 1e-3 #0.001

# etc.
batch_size = 1
seq_length = len(data)-1

# Placeholder
X = tf.placeholder(tf.float32, [None, seq_length, vocab_size])
Y = tf.placeholder(tf.int32, [None, seq_length, vocab_size])

# Output layer of char-rnn
Why = tf.Variable(tf.random_normal([batch_size, hidden_size, vocab_size], name='weight_hy'))
by = tf.Variable(tf.random_normal([batch_size, 1, vocab_size]), name='bias_y')

# Setting cell, loss function, optimizer
cell = tf.nn.rnn_cell.BasicRNNCell(num_units=hidden_size)
state = cell.zero_state(batch_size, tf.float32)
outputs, state = tf.nn.dynamic_rnn(cell=cell, inputs=X,
                                   initial_state=state, dtype=tf.float32)
logit = tf.matmul(outputs, Why) + by
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=logit, labels=Y))
train = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())    
    for step in range(201):
        #Test
        if step % 50 == 0:
            X_test = sess.run(X_train[0][0]).reshape(1, vocab_size) # Start from the beginning
            predtxt = sample(X_test, seq_length, sess.run(cell.variables), sess.run(Why), sess.run(by))
            print('step :', step, 'cost :', cost_val,
                  '\npred :', data[0] + seqtostr(predtxt), '\n','-'*50)
            
        # Train
        cost_val, _ = sess.run([cost, train], feed_dict={X:sess.run(X_train), Y:sess.run(Y_train)})

step : 0 cost : 100 
pred : 안하요안하 
 --------------------------------------------------
step : 50 cost : 0.91580653 
pred : 안녕하하요 
 --------------------------------------------------
step : 100 cost : 0.43975577 
pred : 안녕하세요 
 --------------------------------------------------
step : 150 cost : 0.21642818 
pred : 안녕하세요 
 --------------------------------------------------
step : 200 cost : 0.12542069 
pred : 안녕하세요 
 --------------------------------------------------


In [6]:
tf.reset_default_graph() # To prevent graph error of tensorflow

# For convenience
def seqtostr(input): 
    return ''.join(ix_to_char[ch] for ch in input[:])

# Kapathy's test function
def sample(x, n, cell, Why, by):
    W = cell[0]
    Wxh = W[:vocab_size]
    Whh = W[vocab_size:]
    bh = cell[1]
    h = np.zeros((1, hidden_size))
    ixes = []

    for step in range(n):
        h = np.tanh(np.dot(x, Wxh) + np.dot(h, Whh) + bh)
        y = np.dot(h, Why) + by
        # p = np.exp(y) / np.sum(np.exp(y))
        # ix = np.random.choice(list(range(vocab_size)), p=p.ravel())
        ix = np.argmax(y)
        x = np.zeros((1, vocab_size))
        x[0][ix] = 1
        ixes.append(ix)
    return ixes

# Karpathy's preprocessing
data = open('poem.txt', 'r').read()

chars= list(set(data))
vocab_size = len(chars)
char_to_ix = { ch:i for i,ch in enumerate(chars) }
ix_to_char = { i:ch for i,ch in enumerate(chars) }

# Preparing training
inputs = [[char_to_ix[ch] for ch in data[:-1]]]
targets = [[char_to_ix[ch] for ch in data[1:]]]
X_train = tf.one_hot(inputs, vocab_size)
Y_train = tf.one_hot(targets, vocab_size)

# Hyperparameters
hidden_size = 10
cost_val = 100 # initial cost
learning_rate = 1e-3 #0.001

# etc.
batch_size = 1
seq_length = len(data)-1

# Placeholder
X = tf.placeholder(tf.float32, [None, seq_length, vocab_size])
Y = tf.placeholder(tf.int32, [None, seq_length, vocab_size])

# Output layer of char-rnn
Why = tf.Variable(tf.random_normal([batch_size, hidden_size, vocab_size], name='weight_hy'))
by = tf.Variable(tf.random_normal([batch_size, 1, vocab_size]), name='bias_y')

# Setting cell, loss function, optimizer
cell = tf.nn.rnn_cell.BasicRNNCell(num_units=hidden_size)
state = cell.zero_state(batch_size, tf.float32)
outputs, state = tf.nn.dynamic_rnn(cell=cell, inputs=X,
                                   initial_state=state, dtype=tf.float32)
logit = tf.matmul(outputs, Why) + by
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=logit, labels=Y))
train = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())    
    for step in range(1000):
        #Test
        if step % 100 == 0:
            X_test = sess.run(X_train[0][0]).reshape(1, vocab_size) # Start from the beginning
            predtxt = sample(X_test, seq_length, sess.run(cell.variables), sess.run(Why), sess.run(by))
            print('step :', step, 'cost :', cost_val,
                  '\npred :', data[0] + seqtostr(predtxt), '\n','-'*50)
            
        # Train
        cost_val, _ = sess.run([cost, train], feed_dict={X:sess.run(X_train), Y:sess.run(Y_train)})

step : 0 cost : 100 
pred : 허땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅땅 
 --------------------------------------------------
step : 100 cost : 4.0819936 
pred : 허땅길                                                                                                                                                                                                                                                    
 --------------------------------------------------
step : 200 cost : 3.282193 
pred : 허땅요 땅                                                                                                                                                                                                                                                  
 --------------------------------------------------
s

In [9]:
tf.reset_default_graph() # To prevent graph error of tensorflow

# For convenience
def seqtostr(input): 
    return ''.join(ix_to_char[ch] for ch in input[:])

# Kapathy's test function
def sample(x, n, cell, Why, by):
    W = cell[0]
    Wxh = W[:vocab_size]
    Whh = W[vocab_size:]
    bh = cell[1]
    h = np.zeros((1, hidden_size))
    ixes = []

    for step in range(n):
        h = np.tanh(np.dot(x, Wxh) + np.dot(h, Whh) + bh)
        y = np.dot(h, Why) + by
        # p = np.exp(y) / np.sum(np.exp(y))
        # ix = np.random.choice(list(range(vocab_size)), p=p.ravel())
        ix = np.argmax(y)
        x = np.zeros((1, vocab_size))
        x[0][ix] = 1
        ixes.append(ix)
    return ixes

# Karpathy's preprocessing
data = data = open('song.txt', 'r').read()

chars= list(set(data))
vocab_size = len(chars)
char_to_ix = { ch:i for i,ch in enumerate(chars) }
ix_to_char = { i:ch for i,ch in enumerate(chars) }

# Preparing training
inputs = [[char_to_ix[ch] for ch in data[:-1]]]
targets = [[char_to_ix[ch] for ch in data[1:]]]
X_train = tf.one_hot(inputs, vocab_size)
Y_train = tf.one_hot(targets, vocab_size)

# Hyperparameters
hidden_size = 10
cost_val = 100 # initial cost
learning_rate = 1e-3 #0.001

# etc.
batch_size = 1
seq_length = len(data)-1

# Placeholder
X = tf.placeholder(tf.float32, [None, seq_length, vocab_size])
Y = tf.placeholder(tf.int32, [None, seq_length, vocab_size])

# Output layer of char-rnn
Why = tf.Variable(tf.random_normal([batch_size, hidden_size, vocab_size], name='weight_hy'))
by = tf.Variable(tf.random_normal([batch_size, 1, vocab_size]), name='bias_y')

# Setting cell, loss function, optimizer
cell = tf.nn.rnn_cell.BasicRNNCell(num_units=hidden_size)
state = cell.zero_state(batch_size, tf.float32)
outputs, state = tf.nn.dynamic_rnn(cell=cell, inputs=X,
                                   initial_state=state, dtype=tf.float32)
logit = tf.matmul(outputs, Why) + by
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=logit, labels=Y))
train = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())    
    for step in range(3001):
        #Test
        if step % 300 == 0:
            X_test = sess.run(X_train[0][0]).reshape(1, vocab_size) # Start from the beginning
            predtxt = sample(X_test, seq_length, sess.run(cell.variables), sess.run(Why), sess.run(by))
            print('step :', step, 'cost :', cost_val,
                  '\npred :', data[0] + seqtostr(predtxt), '\n','-'*50)
            
        # Train
        cost_val, _ = sess.run([cost, train], feed_dict={X:sess.run(X_train), Y:sess.run(Y_train)})

step : 0 cost : 100 
pred : 저또 또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같I또또같 
 --------------------------------------------------
step : 300 cost : 2.7694

step : 2700 cost : 0.3296616 
pred : 저 오늘 떠나요 어or
I Likry 나 자유롭게 fly 나 숨을 셔

Take me to new world any F a ayywyouth

l시tonle  꿈같은 my yo yo yonynk me anawaaanht  ely 벗리 ord 날 벗어 나
fl m u 날le yo youth
자유롭게 fly fly 나 숨을 셔

Take me to new world any F a 숨어 나 미쳤어 나 떠날 거야 다 비켜
I fly away-

Take me to new world anayy alway-

Tnke me to new world aaany bin Pw리 떠다니는 새처럼
난 자유롭게 fly fly 나 숨을 셔

Take me to new world any F a ayywyouth
자유롭게 fly fly 나 숨을 셔

Take me to new world any F a 숨어 나 미쳤어 나 떠날 거야 다 비켜
I fly away-

Take me to new world anayy alway-

Take me to London me  치날고다 치이리ry디 자유롭게 fly 나기만 하면
Shining light light 빛나는 my youth
자유롭게 fly fly 나 숨을 셔

Take me to new world any Fly always youth
자유롭게 fly fly 나 숨을 셔

Take me to new world any Fly always anawayy y awawayy aly youth
자유롭게 fly fly 나 숨을 셔

Take me to new world any F a 숨어 나 미쳤어 나 떠날 거야 다 비켜
I fly away-

Take me to new world anayy alway-

Take me to London me  치날고다 치이리ry디 자 
 --------------------------------------------------
step : 3000 cos