# Character level rnn 

In [1]:
import os
import time

In [6]:
import tensorflow as tf
import numpy as np

In [3]:
DATA_PATH = './data/arvix_abstracts.txt'
HIDDEN_SIZE = 200
BATCH_SIZE = 64
NUM_STEPS = 50
SKIP_STEP = 40
TEMPRATURE = 0.7
LR = 0.003
LEN_GENERATED = 300

# Utils 

In [47]:
def vocab_encode(text, vocab):
    """
    text가 Vocab(characters) 안에 있으면 index로 변환
    """
    return [vocab.index(x) + 1 for x in text if x in vocab]

def vocab_decode(array, vocab):
    """
    index를 다시 문자열로 디코딩
    """
    return ''.join([vocab[x - 1] for x in array])

def read_data(filename, vocab, window=NUM_STEPS, overlap=NUM_STEPS/2):
    for text in open(filename):
        text = vocab_encode(text, vocab)

        for start in range(0, len(text) - window, int(overlap)):
            chunk = text[start: start + window]
            chunk += [0] * (window - len(chunk))
            yield chunk

def read_batch(stream, batch_size=BATCH_SIZE):
    batch = []
    for element in stream:
        batch.append(element)
        if len(batch) == batch_size:
            yield batch
            batch = []


# Create RNN 

In [37]:
def create_rnn(seq, hidden_size=HIDDEN_SIZE):
    cell = tf.contrib.rnn.GRUCell(hidden_size)
    in_state = tf.placeholder_with_default(
            cell.zero_state(tf.shape(seq)[0], tf.float32), [None, hidden_size])
    # 초기 state를 default로 다 0인 벡터로 잡기?
    
    # 시퀀스의 실제 길이를 저장한다
    # 모든 시퀀스는 NUM_STEPS만큼으로 padding 된다
    length = tf.reduce_sum(tf.reduce_max(tf.sign(seq), 2), 1)
    
    # dynamic_rnn을 사용하려면 실제 길이를 알려줘야 함(length)
    output, out_state = tf.nn.dynamic_rnn(cell, seq, length, in_state)
    
    return output, in_state, out_state

#### tf.sign(x, name=None) 

Returns an element-wise indication of the sign of a number.

y = sign(x) = -1 if x < 0; 0 if x == 0; 1 if x > 0.

#### tf.reduce_max(input_tensor, axis=None, keep_dims=False, name=None, reduction_indices=None)

Computes the maximum of elements across dimensions of a tensor.

Reduces input_tensor along the dimensions given in axis. Unless keep_dims is true, the rank of the tensor is reduced by 1 for each entry in axis. If keep_dims is true, the reduced dimensions are retained with length 1.

If axis has no entries, all dimensions are reduced, and a tensor with a single element is returned.

In [10]:
test = np.random.randn(2,10)

In [23]:
test = np.array([[1,2,3,4,5,6]])

In [24]:
test

array([[1, 2, 3, 4, 5, 6]])

In [25]:
with tf.Session() as sess:
    print(sess.run(tf.reduce_max(test)))

6


In [29]:
test = np.array([[1,1,1],[1,1,1]])

#### tf.reduce_sum(input_tensor, axis=None, keep_dims=False, name=None, reduction_indices=None)

In [30]:
with tf.Session() as sess:
    print(sess.run(tf.reduce_sum(test)))
    print(sess.run(tf.reduce_sum(test,0)))
    print(sess.run(tf.reduce_sum(test,1)))
    print(sess.run(tf.reduce_sum(test,1,keep_dims=True)))
    print(sess.run(tf.reduce_sum(test,[0,1])))

6
[2 2 2]
[3 3]
[[3]
 [3]]
6


#### tf.nn.dynamic_rnn(cell, inputs, sequence_length=None, initial_state=None, dtype=None, parallel_iterations=None, swap_memory=False, time_major=False, scope=None)

uses a tf.While loop to dynamically construct the graph when it is executed. Graph creation is faster and you can feed batches of variable size. (다 다른 문장 길이를 동적으로 생성. 오히려 더 빠르다)

# Create model 

In [39]:
def create_model(seq, temp, vocab, hidden=HIDDEN_SIZE):
    seq = tf.one_hot(seq, len(vocab))
    output, in_state, out_state = create_rnn(seq, hidden)
    # fully_connected is syntactic sugar for tf.matmul(w, output) + b
    # it will create w and b for us
    logits = tf.contrib.layers.fully_connected(output, len(vocab), None)
    loss = tf.reduce_sum(tf.nn.softmax_cross_entropy_with_logits(logits=logits[:, :-1],labels=seq[:, 1:]))
    # sample the next character from Maxwell-Boltzmann Distribution with temperature temp
    # it works equally well without tf.exp
    sample = tf.multinomial(tf.exp(logits[:, -1] / temp), 1)[:, 0] 
    return loss, sample, in_state, out_state

In [53]:
def training(vocab, seq, loss, optimizer, global_step, temp, sample, in_state, out_state):
    saver = tf.train.Saver()
    start = time.time()
    with tf.Session() as sess:
        writer = tf.summary.FileWriter('graphs/gist', sess.graph)
        sess.run(tf.global_variables_initializer())
        
        ckpt = tf.train.get_checkpoint_state(os.path.dirname('ckp/arvix/checkpoint'))
        if ckpt and ckpt.model_checkpoint_path:
            saver.restore(sess, ckpt.model_checkpoint_path)
        
        iteration = global_step.eval()
        for batch in read_batch(read_data(DATA_PATH, vocab)):
            batch_loss, _ = sess.run([loss, optimizer], {seq: batch})
            if (iteration + 1) % SKIP_STEP == 0:
                print('Iter {}. \n    Loss {}. Time {}'.format(iteration, batch_loss, time.time() - start))
                online_intference(sess, vocab, seq, sample, temp, in_state, out_state)
                start = time.time()
                saver.save(sess, 'ckp/arvix/char-rnn', iteration)
            iteration += 1

def online_intference(sess, vocab, seq, sample, temp, in_state, out_state, seed='T'):
    """ Generate sequence one character at a time, based on the previous character
    """
    sentence = seed
    state = None
    for _ in range(LEN_GENERATED):
        batch = [vocab_encode(sentence[-1], vocab)]
        feed = {seq: batch, temp: TEMPRATURE}
        # for the first decoder step, the state is None
        if state is not None:
            feed.update({in_state: state})
        index, state = sess.run([sample, out_state], feed)
        sentence += vocab_decode(index, vocab)
    print(sentence)

# Main 

In [54]:
tf.reset_default_graph()

In [55]:
vocab = (
            " $%'()+,-./0123456789:;=?ABCDEFGHIJKLMNOPQRSTUVWXYZ"
            "\\^_abcdefghijklmnopqrstuvwxyz{|}")
seq = tf.placeholder(tf.int32, [None, None])
temp = tf.placeholder(tf.float32)
loss, sample, in_state, out_state = create_model(seq, temp, vocab)
global_step = tf.Variable(0, dtype=tf.int32, trainable=False, name='global_step')
optimizer = tf.train.AdamOptimizer(LR).minimize(loss, global_step=global_step)
training(vocab, seq, loss, optimizer, global_step, temp, sample, in_state, out_state)

Iter 39. 
    Loss 9483.3203125. Time 10.39647650718689
T7oe   e                                                                                                                                                e          e    e   e   e   e   e    e   e   e   e   e   e   e   e   e   e   e   e   e   e  e   e  e  e   e   e   e  e  e   e  e   e  e  e  e  e  e  
Iter 79. 
    Loss 8448.4296875. Time 11.026517391204834
T3y the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the t
Iter 119. 
    Loss 7811.4736328125. Time 9.393127679824829
The the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the