https://github.com/sjchoi86/advanced-tensorflow/blob/master/basic/char-rnn-simple.ipynb

SIMPLE CHAR-RNN

In [1]:
from __future__ import print_function
import tensorflow as tf
import numpy as np
from tensorflow.contrib import rnn
tf.set_random_seed(0)  
print ("TENSORFLOW VERSION IS %s" % (tf.__version__))

TENSORFLOW VERSION IS 1.0.1


DEFINE TRAINING SEQUENCE

In [2]:
quote1 = ("If you want to build a ship, "
          "don't drum up people to collect wood and don't assign them tasks and work,"
          " but rather teach them to long for the endless immensity of the sea.")
quote2 = ("Perfection is achieved, "
          "not when there is nothing more to add, "
          "but when there is nothing left to take away.")
sentence = quote2
print ("FOLLOWING IS OUR TRAINING SEQUENCE:")
print (sentence)

FOLLOWING IS OUR TRAINING SEQUENCE:
Perfection is achieved, not when there is nothing more to add, but when there is nothing left to take away.


DEFINE VOCABULARY AND DICTIONARY

In [3]:
char_set = list(set(sentence))
char_dic = {w: i for i, w in enumerate(char_set)}
print ("VOCABULARY: ")
print (char_set)
print ("DICTIONARY: ")
print (char_dic)

VOCABULARY: 
[' ', ',', '.', 'P', 'a', 'c', 'b', 'e', 'd', 'g', 'f', 'i', 'h', 'k', 'm', 'l', 'o', 'n', 's', 'r', 'u', 't', 'w', 'v', 'y']
DICTIONARY: 
{' ': 0, ',': 1, '.': 2, 'P': 3, 'a': 4, 'c': 5, 'b': 6, 'e': 7, 'd': 8, 'g': 9, 'f': 10, 'i': 11, 'h': 12, 'k': 13, 'm': 14, 'l': 15, 'o': 16, 'n': 17, 's': 18, 'r': 19, 'u': 20, 't': 21, 'w': 22, 'v': 23, 'y': 24}


CONFIGURE NETWORK

In [4]:
data_dim    = len(char_set)
num_classes = len(char_set)
hidden_size     = 64
sequence_length = 15  # Any arbitrary number

SET TRAINING BATCHES

In [6]:
dataX = []
dataY = []
for i in range(0, len(sentence) - sequence_length):
    x_str = sentence[i:i + sequence_length]
    y_str = sentence[i + 1: i + sequence_length + 1]
    x = [char_dic[c] for c in x_str]  # x str to index
    y = [char_dic[c] for c in y_str]  # y str to index
    dataX.append(x)
    dataY.append(y)
    if i < 15:
        print ("[%4d/%4d] [%s]=>[%s]" % (i, len(sentence), x_str, y_str))
        print ("%s%s=>%s" % (' '*12, x, y))

[   0/ 107] [Perfection is a]=>[erfection is ac]
            [3, 7, 19, 10, 7, 5, 21, 11, 16, 17, 0, 11, 18, 0, 4]=>[7, 19, 10, 7, 5, 21, 11, 16, 17, 0, 11, 18, 0, 4, 5]
[   1/ 107] [erfection is ac]=>[rfection is ach]
            [7, 19, 10, 7, 5, 21, 11, 16, 17, 0, 11, 18, 0, 4, 5]=>[19, 10, 7, 5, 21, 11, 16, 17, 0, 11, 18, 0, 4, 5, 12]
[   2/ 107] [rfection is ach]=>[fection is achi]
            [19, 10, 7, 5, 21, 11, 16, 17, 0, 11, 18, 0, 4, 5, 12]=>[10, 7, 5, 21, 11, 16, 17, 0, 11, 18, 0, 4, 5, 12, 11]
[   3/ 107] [fection is achi]=>[ection is achie]
            [10, 7, 5, 21, 11, 16, 17, 0, 11, 18, 0, 4, 5, 12, 11]=>[7, 5, 21, 11, 16, 17, 0, 11, 18, 0, 4, 5, 12, 11, 7]
[   4/ 107] [ection is achie]=>[ction is achiev]
            [7, 5, 21, 11, 16, 17, 0, 11, 18, 0, 4, 5, 12, 11, 7]=>[5, 21, 11, 16, 17, 0, 11, 18, 0, 4, 5, 12, 11, 7, 23]
[   5/ 107] [ction is achiev]=>[tion is achieve]
            [5, 21, 11, 16, 17, 0, 11, 18, 0, 4, 5, 12, 11, 7, 23]=>[21, 11, 16, 17, 0, 11, 18, 

In [7]:
ndata      = len(dataX)
batch_size = 512
print ("     'NDATA' IS %d" % (ndata))
print ("'BATCH_SIZE' IS %d" % (batch_size))

     'NDATA' IS 92
'BATCH_SIZE' IS 512


DEFINE PLACEHOLDERS

In [8]:
X = tf.placeholder(tf.int32, [None, sequence_length])
Y = tf.placeholder(tf.int32, [None, sequence_length])
X_OH = tf.one_hot(X, num_classes)
print ("'sequence_length' IS [%d]" % (sequence_length))
print ("    'num_classes' IS [%d]" % (num_classes))
print("'X' LOOKS LIKE \n   [%s]" % (X))  
print("'X_OH' LOOKS LIKE \n   [%s]" % (X_OH))

'sequence_length' IS [15]
    'num_classes' IS [25]
'X' LOOKS LIKE 
   [Tensor("Placeholder:0", shape=(?, 15), dtype=int32)]
'X_OH' LOOKS LIKE 
   [Tensor("one_hot:0", shape=(?, 15, 25), dtype=float32)]


DEFINE MODEL

In [9]:
num_hidden = 128
with tf.variable_scope('CHAR-RNN', reuse=False):
    cell = rnn.BasicLSTMCell(hidden_size, state_is_tuple=True)
    cell = rnn.MultiRNNCell([cell] * 2, state_is_tuple=True)
    # DYNAMIC RNN WITH FULLY CONNECTED LAYER
    _hiddens  = tf.contrib.layers.fully_connected(X_OH, num_hidden, activation_fn=tf.nn.relu)
    _outputs, _states = tf.nn.dynamic_rnn(cell, _hiddens, dtype=tf.float32)
    _outputs  = tf.contrib.layers.fully_connected(_outputs, num_classes, activation_fn=None)
    # RESHAPE FOR SEQUNCE LOSS
    outputs = tf.reshape(_outputs, [batch_size, sequence_length, num_classes])
print ("OUTPUTS LOOKS LIKE [%s]" % (outputs))
print ("MODEL DEFINED.")

OUTPUTS LOOKS LIKE [Tensor("CHAR-RNN/Reshape:0", shape=(512, 15, 25), dtype=float32)]
MODEL DEFINED.


DEFINE TF FUNCTIONS

In [10]:
weights = tf.ones([batch_size, sequence_length]) # EQUAL WEIGHTS
seq_loss = tf.contrib.seq2seq.sequence_loss(
    logits=outputs, targets=Y, weights=weights)
loss  = tf.reduce_mean(seq_loss)
optm  = tf.train.AdamOptimizer(learning_rate=0.01).minimize(loss)
print ("FUNCTIONS DEFINED.")

FUNCTIONS DEFINED.


OPTIMIZE

In [11]:
config = tf.ConfigProto()
config.gpu_options.allow_growth=True
sess = tf.Session(config=config)
sess.run(tf.global_variables_initializer())
MAXITER = 2000
for i in range(MAXITER):
    randidx = np.random.randint(low=0, high=ndata, size=batch_size)
    batchX = [dataX[iii] for iii in randidx]
    batchY = [dataY[iii] for iii in randidx]
    feeds = {X: batchX, Y: batchY}
    _, loss_val, results = sess.run(
        [optm, loss, outputs], feed_dict=feeds)
    if (i%200) == 0:
        print ("[%5d] loss_val: %.5f " % (i, loss_val))

[    0] loss_val: 3.21298 
[  200] loss_val: 0.11954 
[  400] loss_val: 0.11443 
[  600] loss_val: 0.11208 
[  800] loss_val: 0.11211 
[ 1000] loss_val: 0.10972 
[ 1200] loss_val: 0.10929 
[ 1400] loss_val: 0.11141 
[ 1600] loss_val: 0.11319 
[ 1800] loss_val: 0.11087 


PRINT CHARS

In [12]:
randidx = np.random.randint(low=0, high=ndata, size=batch_size)
batchX = [dataX[iii] for iii in randidx]
batchY = [dataY[iii] for iii in randidx]
feeds = {X: batchX, Y: batchY}
results = sess.run(outputs, feed_dict=feeds)
for j, result in enumerate(results):
    index = np.argmax(result, axis=1)
    chars = [char_set[t] for t in index]
    if j < 10:
        print (chars)

['e', 'e', 'c', 't', 'i', 'o', 'n', ' ', 'i', 's', ' ', 'a', 'c', 'h', 'i']
[' ', 't', 'h', 'i', 'n', 'g', ' ', 'l', 'o', 'r', 'e', ' ', 't', 'o', ' ']
[' ', ' ', 't', 'h', 'e', 'r', 'e', ' ', 'i', 's', ' ', 'n', 'o', 't', 'h']
['s', 'n', ' ', 'i', 's', ' ', 'a', 'c', 'h', 'i', 'e', 'v', 'e', 'd', ',']
['e', 'r', 'f', 'e', 'c', 't', 'i', 'o', 'n', ' ', 'i', 's', ' ', 'a', 'c']
['e', 'r', 'e', ' ', 'i', 's', ' ', 'n', 'o', 't', 'h', 'i', 'n', 'g', ' ']
[' ', 'e', 'd', ',', ' ', 'n', 'o', 't', ' ', 'w', 'h', 'e', 'n', ' ', 't']
['t', 'i', 'o', 'n', ' ', 'i', 's', ' ', 'a', 'c', 'h', 'i', 'e', 'v', 'e']
['h', 'i', 'r', 'e', ' ', 'i', 's', ' ', 'n', 'o', 't', 'h', 'i', 'n', 'g']
['t', ' ', 'w', 'h', 'e', 'n', ' ', 't', 'h', 'e', 'r', 'e', ' ', 'i', 's']


SAMPLING FUNCTION

In [13]:
LEN = 1;
# XL = tf.placeholder(tf.int32, [None, LEN])
XL     = tf.placeholder(tf.int32, [None, 1])
XL_OH  = tf.one_hot(XL, num_classes)
with tf.variable_scope('CHAR-RNN', reuse=True):
    cell_L = rnn.BasicLSTMCell(hidden_size, state_is_tuple=True)
    cell_L = rnn.MultiRNNCell([cell_L] * 2, state_is_tuple=True)
    istate = cell_L.zero_state(batch_size=1, dtype=tf.float32)
    # DYNAMIC RNN WITH FULLY CONNECTED LAYER
    _hiddens  = tf.contrib.layers.fully_connected(XL_OH, num_hidden, activation_fn=tf.nn.tanh)
    _outputs_L, states_L = tf.nn.dynamic_rnn(cell_L, _hiddens
                                , initial_state=istate, dtype=tf.float32)
    _outputs_L  = tf.contrib.layers.fully_connected(
        _outputs_L, num_classes, activation_fn=None)
    # RESHAPE FOR SEQUNCE LOSS
    outputs_L = tf.reshape(_outputs_L, [LEN, 1, num_classes])
print (XL)

def weighted_pick(weights):
    t = np.cumsum(weights)
    s = np.sum(weights)
    return(int(np.searchsorted(t, np.random.rand(1)*s)))
def softmax(x):
    alpha = 1
    e_x = np.exp(alpha*(x - np.max(x)))
    return e_x / np.sum(e_x) # only difference

Tensor("Placeholder_2:0", shape=(?, 1), dtype=int32)


SAMPLE

BURNIN

In [14]:
prime = "Perfection is"
istateval = sess.run(cell_L.zero_state(1, tf.float32))
for c in prime[:-1]:
    index = char_dic[c]
    inval = [[index]]
    outval, stateval = sess.run([outputs_L, states_L]
                        , feed_dict={XL:inval, istate:istateval})
    istateval = stateval

SAMPLE

In [15]:
inval  = [[char_dic[prime[-1]]]]
outval, stateval = sess.run([outputs_L, states_L]
                    , feed_dict={XL:inval, istate:istateval})
istateval = stateval
index = np.argmax(outval)
char  = char_set[index]
chars = char
for i in range(150):
    inval = [[index]]
    outval, stateval = sess.run([outputs_L, states_L]
                        , feed_dict={XL:inval, istate:istateval})
    istateval = stateval
    # index = np.argmax(outval)
    index = weighted_pick(softmax(outval))
    char  = char_set[index]
    chars += char
print ("<SAMPLED SETENCE> \n %s" % (prime+chars))
print ("\n<ORIGINAL SENTENCE> \n %s" % (sentence))

<SAMPLED SETENCE> 
 Perfection is achieved, nothing left to take away. more to add, but when there is nothing more to add, but when there is nothing more to add, but when there is noth

<ORIGINAL SENTENCE> 
 Perfection is achieved, not when there is nothing more to add, but when there is nothing left to take away.
