## Reference
- [Recurrent Neural Networks in Tensorflow II](http://r2rt.com/recurrent-neural-networks-in-tensorflow-ii.html)

## Imports, Task and data

In [1]:
import numpy as np
import tensorflow as tf

import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline

import time
import os
import urllib.request

import seaborn; seaborn.set()

In [2]:
# Load data
file_url = 'https://raw.githubusercontent.com/samim23/obama-rnn/master/input.txt'
file_name = 'obama_speech.txt'
if not os.path.exists(file_name):
    urllib.request.urlretrieve(file_url, file_name)
    
with open(file_name, 'r', encoding='utf8') as f:
    raw_data = f.read()
    print(raw_data[:30] + ".....")
    print("Data length: ", len(raw_data))

To Chip, Kathy, and Nancy, who.....
Data length:  4224143


In [3]:
# process data - vocab for char and transformed vocab to idx.

vocab = set(raw_data)
vocab_size = len(vocab)
idx_to_vocab = dict(enumerate(vocab))
vocab_to_idx = dict(zip(idx_to_vocab.values(), idx_to_vocab.keys()))

data = [vocab_to_idx[c] for c in raw_data]
del raw_data
print(data[:10])

[3, 69, 92, 51, 45, 49, 57, 115, 92, 54]


In [4]:
# utility functions
def gen_epochs(n, num_steps, batch_size):
    for i in range(n):
        yield ptb_iterator(data, batch_size, num_steps)
    
def ptb_iterator(raw_data, batch_size, num_steps):
    raw_data = np.array(raw_data, dtype=np.int32)
    
    data_len = len(raw_data)
    batch_len = data_len // batch_size
    data = np.zeros([batch_size, batch_len], dtype=np.int32)
    for i in range(batch_size):
        data[i] = raw_data[batch_len*i:batch_len*(i+1)]
        
    epoch_size = (batch_len - 1) // num_steps
    
    if epoch_size == 0:
        raise ValueError("epoch size == 0, decrease batch_size or num_steps")
        
    for i in range(epoch_size):
        x = data[:, i*num_steps: (i+1)*num_steps]
        y = data[:, i*num_steps+1: (i+1)*num_steps+1]
        yield (x, y)
        
def reset_graph():
    if 'sess' in globals() and sess:
        sess.close()
    tf.reset_default_graph()
    
def train(g, num_epochs, num_steps=200, batch_size=32, verbose=True, 
          verbose_step=1000, save=False):
    tf.set_random_seed(2345)
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        traininig_losses = []
        for idx, epoch in enumerate(gen_epochs(num_epochs, num_steps, batch_size)):
            traininig_loss = 0
            steps = 0
            traininig_state = None
            for X, y in epoch:
                steps += 1
                feed_dict ={g['x']: X, g['y']: y}
                if traininig_state is not None:
                    feed_dict[g['init_state']] = traininig_state
                traininig_loss_, traininig_state, _ = sess.run(
                    [g['total_loss'], g['final_state'], g['train_step']], feed_dict)
                traininig_loss += traininig_loss_
                
            if verbose:
                print("Average trainig loss for Epoch", idx, ":", traininig_loss/steps)
            traininig_losses.append(traininig_loss/steps)
            
        if isinstance(save, str):
            save_dir = save.split("/")[0]
            if not os.path.exists(save_dir):
                os.makedirs(save_dir)
            g['saver'].save(sess, save)
    
    return traininig_losses

## Model

In [5]:
def build_graph(state_size=100, num_classes=vocab_size, batch_size=32, num_steps=200,
                num_layers=3, learning_rate=1e-4):
    
    reset_graph()
    
    X = tf.placeholder(tf.int32, [batch_size, num_steps], name='input_placeholder')
    y = tf.placeholder(tf.int32, [batch_size, num_steps], name='labels_placehoder')
    
    embeddings = tf.get_variable('embedding_matrix', [num_classes, state_size])
    rnn_inputs = tf.nn.embedding_lookup(embeddings, X)
    
    cell = tf.contrib.rnn.GRUCell(state_size)
    cell = tf.contrib.rnn.MultiRNNCell([cell] * num_layers)
    
    init_state = cell.zero_state(batch_size, tf.float32)
    rnn_outputs, final_state = tf.nn.dynamic_rnn(cell, rnn_inputs, initial_state=init_state)
    
    with tf.variable_scope('softmax'):
        W = tf.get_variable('W', [state_size, num_classes])
        b = tf.get_variable('b', [num_classes], initializer=tf.constant_initializer(0.0))
        
    # reshape rnn_outputs and y
    rnn_outputs = tf.reshape(rnn_outputs, [-1, state_size])
    y_reshaped = tf.reshape(y, [-1])
    
    logits = tf.matmul(rnn_outputs, W) + b
    predictions = tf.nn.softmax(logits)
    total_loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=y_reshaped))
    train_step = tf.train.AdamOptimizer(learning_rate).minimize(total_loss)
    
    return {
        "x": X,
        "y": y,
        "init_state": init_state,
        "final_state": final_state,
        "total_loss": total_loss,
        "train_step": train_step,
        "prediction": predictions,
        "saver": tf.train.Saver()
    }

## Training

In [6]:
train_epochs = 10
train_num_steps = 40
checkpoint_dir = "save/GRU_" + str(train_epochs) + "_epochs"

g = build_graph(num_steps=train_num_steps)
t = time.time()

losses = train(g, train_epochs, num_steps=train_num_steps, save=checkpoint_dir)
print("It took", time.time() - t, "seconds to train for " + str(train_epochs) + " epochs.")
print("The average loss on the final epoch was:", losses[-1])

Average trainig loss for Epoch 0 : 2.52731376926
Average trainig loss for Epoch 1 : 1.96453432134
Average trainig loss for Epoch 2 : 1.76912310658
Average trainig loss for Epoch 3 : 1.64480506048
Average trainig loss for Epoch 4 : 1.55989130309
Average trainig loss for Epoch 5 : 1.49849027399
Average trainig loss for Epoch 6 : 1.45135068778
Average trainig loss for Epoch 7 : 1.41417253007
Average trainig loss for Epoch 8 : 1.38430166599
Average trainig loss for Epoch 9 : 1.35971391667
It took 2985.2912950515747 seconds to train for 10 epochs.
The average loss on the final epoch was: 1.35971391667


## Generating text

In [7]:
def generate_characters(g, checkpoint, num_chars, prompt='A', pick_top_chars=None):
    # Accepts a current character, inital state

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        g['saver'].restore(sess, checkpoint)

        state = None
        current_char = vocab_to_idx[prompt]
        chars = [current_char]

        for i in range(num_chars):
            if state is not None:
                feed_dict={g['x']: [[current_char]], g['init_state']: state}
            else:
                feed_dict={g['x']: [[current_char]]}

            preds, state = sess.run([g['prediction'],g['final_state']], feed_dict)

            if pick_top_chars is not None:
                p = np.squeeze(preds)
                p[np.argsort(p)[:-pick_top_chars]] = 0
                p = p / np.sum(p)
                current_char = np.random.choice(vocab_size, 1, p=p)[0]
            else:
                current_char = np.random.choice(vocab_size, 1, p=np.squeeze(preds))[0]

            chars.append(current_char)

    chars = map(lambda x: idx_to_vocab[x], chars)
    return("".join(chars))

In [8]:
g = build_graph(num_steps=1, batch_size=1)
generate_characters(g, checkpoint_dir, 750, prompt='H', pick_top_chars=5)

'H Wasels. They are not because the primately of alliance to came secure with change. All the prises and technology that we will be an accession to combight this its accupled and chould and we’re compenity and shave to broken a dollan entama of the right of the partners of these some for to should be configinc to partners and statism. And what they are without ship as a come that a security to should be the pates and shis sour who are to prayer singa and the right to the some of the pact of a should all a will act of as we can be new presentice in our competiers and sense of you to say that we’ve actions and was so the pass than we see in one travel countrys are comests and who can stand with the way outsigess to do white human tries of the p'