In [None]:
%pylab

In [27]:
import tensorflow as tf
import numpy as np

In [2]:
from six.moves import urllib

In [3]:
f, _ = urllib.request.urlretrieve("http://www.gutenberg.org/cache/epub/28885/pg28885.txt", 'alice.txt')

In [4]:
with open(f, 'r') as fp:
    text = fp.read()

In [5]:
print(text[:100])

﻿Project Gutenberg's Alice's Adventures in Wonderland, by Lewis Carroll

This eBook is for the u


In [6]:
vocab = list(set(text))

In [7]:
len(vocab), len(text)

(93, 177428)

In [8]:
encoder = dict(zip(vocab, range(len(vocab))))

In [9]:
decoder = dict(zip(range(len(vocab)), vocab))

In [10]:
batch_size=100
seq_len = 100
n_layers = 2
n_chars = len(vocab)
n_cells = 256

In [11]:
X = tf.placeholder(tf.int32, [None, seq_len], name='X')
Y = tf.placeholder(tf.int32, [None, seq_len], name='Y')

In [12]:
# we first create a variable to take us from our one-hot representation to our LSTM cells
embedding = tf.get_variable("embedding", [n_chars, n_cells])

# And then use tensorflow's embedding lookup to look up the ids in X
Xs = tf.nn.embedding_lookup(embedding, X)

# The resulting lookups are concatenated into a dense tensor
print(Xs.get_shape().as_list())

[None, 100, 256]


In [13]:
with tf.name_scope('reslice'):
    Xs = [tf.squeeze(seq, [1]) for seq in tf.split(1, seq_len, Xs)]

In [14]:
cell = tf.nn.rnn_cell.BasicLSTMCell(num_units=n_cells, state_is_tuple=True)

In [15]:
initial_state = cell.zero_state(tf.shape(X)[0], tf.float32)

In [16]:
if n_layers > 1:
    cell = tf.nn.rnn_cell.MultiRNNCell([cell]*n_layers, state_is_tuple=True)
initial_state = cell.zero_state(tf.shape(X)[0], tf.float32)

In [17]:
outputs, state = tf.nn.rnn(cell, Xs, initial_state=initial_state)

In [18]:
output_flat = tf.reshape(tf.concat(1, outputs), [-1, n_cells])

In [19]:
with tf.variable_scope('prediction'):
    W = tf.get_variable(
        "W",
        shape=[n_cells, n_chars],
        initializer=tf.random_normal_initializer(stddev=0.1))
    b = tf.get_variable(
        "b",
        shape=[n_chars],
        initializer=tf.random_normal_initializer(stddev=0.1))

    # Find the output prediction of every single character in our minibatch
    # we denote the pre-activation prediction, logits.
    logits = tf.matmul(output_flat, W) + b

    # We get the probabilistic version by calculating the softmax of this
    probs = tf.nn.softmax(logits)

    # And then we can find the index of maximum probability
    Y_pred = tf.argmax(probs, 1)

In [21]:
with tf.variable_scope('loss'):
    # Compute mean cross entropy loss for each output.
    Y_true_flat = tf.reshape(tf.concat(1, Y), [-1])
    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, Y_true_flat)
    mean_loss = tf.reduce_mean(loss)

In [22]:
with tf.name_scope('optimizer'):
    optimizer = tf.train.AdamOptimizer(learning_rate=0.001)
    gradients = []
    clip = tf.constant(5.0, name="clip")
    for grad, var in optimizer.compute_gradients(mean_loss):
        gradients.append((tf.clip_by_value(grad, -clip, clip), var))
    updates = optimizer.apply_gradients(gradients)

In [None]:
sess = tf.Session()
init = tf.initialize_all_variables()
sess.run(init)

cursor = 0
it_i = 0
while True:
    Xs, Ys = [], []
    for batch_i in range(batch_size):
        if (cursor + seq_len) >= len(text) - seq_len - 1:
            cursor = 0
        Xs.append([encoder[ch]
                   for ch in text[cursor:cursor + seq_len]])
        Ys.append([encoder[ch]
                   for ch in text[cursor + 1: cursor + seq_len + 1]])

        cursor = (cursor + seq_len)
    Xs = np.array(Xs).astype(np.int32)
    Ys = np.array(Ys).astype(np.int32)

    loss_val, _ = sess.run([mean_loss, updates],
                           feed_dict={X: Xs, Y: Ys})
    print(it_i, loss_val)

    if it_i % 500 == 0:
        p = sess.run([Y_pred], feed_dict={X: Xs})[0]
        preds = [decoder[p_i] for p_i in p]
        print("".join(preds).split('\n'))

    it_i += 1

(0, 4.5472569)
['PPPPPPPPPPPPPPPPPPPPPPP     PP      PPPPPPPP      PP PPPPPP                                         PPPPPPPPPPPPPP  PPPPP                                                PPPPPPPPP                      PPPPPffW                                                                      PPPPPPPPPP            PPPPPPPPPPP            \xc3\xc3 \xc3PPPP                        PPPPPPP          PPP                         PPPPPPPPPPPPPPPPP                                                              PPPPPPPPPPP          PPPPPPPPPPPPPPPPPPP                                  qqq                                            PPPPPPPPPPPPPPPPP\xc3\xc3\xc3\xc3\xc3                                           C                                  PPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPP                CCCCCCP                     PPPPPPP                 PPPPPPPPPP      PP                                                  PPPPPPPq                        PPPPPPPPPPPPPP                         PPPPPPPPPPPPPPPPPPPP