In [1]:
from __future__ import print_function
from keras.callbacks import LambdaCallback
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.layers import LSTM
from keras.optimizers import RMSprop
from keras.utils.data_utils import get_file
import numpy as np
import random
import sys
import io

Using TensorFlow backend.


In [2]:
path = get_file('nietzsche.txt', origin='https://s3.amazonaws.com/text-datasets/nietzsche.txt')
text = io.open(path, encoding='utf-8').read().lower()
print('corpus length:', len(text))

chars = sorted(list(set(text)))
print('total chars:', len(chars))
char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))

# cut the text in semi-redundant sequences of maxlen characters
maxlen = 40
step = 3
sentences = []
next_chars = []
for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i: i + maxlen])
    next_chars.append(text[i + maxlen])
print('nb sequences:', len(sentences))

corpus length: 600893
total chars: 57
nb sequences: 200285


In [19]:
sentences

['preface\n\n\nsupposing that truth is a woma',
 'face\n\n\nsupposing that truth is a woman--',
 'e\n\n\nsupposing that truth is a woman--wha',
 '\nsupposing that truth is a woman--what t',
 'pposing that truth is a woman--what then',
 'sing that truth is a woman--what then? i',
 'g that truth is a woman--what then? is t',
 'hat truth is a woman--what then? is ther',
 ' truth is a woman--what then? is there n',
 'uth is a woman--what then? is there not ',
 ' is a woman--what then? is there not gro',
 ' a woman--what then? is there not ground',
 'woman--what then? is there not ground\nfo',
 'an--what then? is there not ground\nfor s',
 '-what then? is there not ground\nfor susp',
 'at then? is there not ground\nfor suspect',
 'then? is there not ground\nfor suspecting',
 'n? is there not ground\nfor suspecting th',
 'is there not ground\nfor suspecting that ',
 'there not ground\nfor suspecting that all',
 're not ground\nfor suspecting that all ph',
 'not ground\nfor suspecting that al

In [20]:
next_chars

['n',
 'w',
 't',
 'h',
 '?',
 's',
 'h',
 'e',
 'o',
 'g',
 'u',
 '\n',
 'r',
 'u',
 'e',
 'i',
 ' ',
 'a',
 'a',
 ' ',
 'i',
 's',
 'h',
 's',
 'i',
 's',
 'f',
 ' ',
 ' ',
 'e',
 'h',
 'e',
 'e',
 '\n',
 'g',
 't',
 't',
 ' ',
 'v',
 'f',
 'l',
 ' ',
 ' ',
 'd',
 's',
 'n',
 'w',
 'e',
 '-',
 'a',
 't',
 ' ',
 'r',
 'b',
 '\n',
 'r',
 'u',
 'e',
 ' ',
 'd',
 'l',
 's',
 'i',
 'o',
 'u',
 't',
 'w',
 'h',
 'h',
 'h',
 'h',
 ' ',
 'v',
 'u',
 'a',
 'y',
 'a',
 '\n',
 'e',
 ' ',
 'd',
 's',
 's',
 'o',
 'r',
 'h',
 'h',
 'e',
 'e',
 ' ',
 's',
 'l',
 'd',
 'n',
 'u',
 'e',
 'l',
 'm',
 'h',
 's',
 'o',
 'w',
 'n',
 'g',
 ' ',
 'm',
 '?',
 'e',
 'a',
 'l',
 's',
 ' ',
 's',
 'e',
 'r',
 'l',
 'w',
 ' ',
 'r',
 'l',
 't',
 'b',
 'w',
 ';',
 'n',
 'a',
 'p',
 's',
 't',
 'v',
 'y',
 'i',
 ' ',
 ' ',
 'g',
 ' ',
 'a',
 's',
 'i',
 ' ',
 'd',
 'n',
 'd',
 'c',
 'r',
 'e',
 'm',
 'n',
 'i',
 '\n',
 'd',
 'd',
 'i',
 's',
 'n',
 ' ',
 ' ',
 'l',
 'f',
 ' ',
 'e',
 ' ',
 'e',
 'c',
 'f',
 's',

In [3]:
print('Vectorization...')
x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1


Vectorization...


In [15]:
x.shape

(200285, 40, 57)

In [18]:
y[2]

array([False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False,  True, False, False, False, False, False, False, False,
       False, False, False], dtype=bool)

In [8]:
# build the model: a single LSTM
print('Build model...')
model = Sequential()
model.add(LSTM(128, input_shape=(maxlen, len(chars))))
model.add(Dense(len(chars)))
model.add(Activation('softmax'))

optimizer = RMSprop(lr=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)


Build model...


In [17]:
np.where(x[0])

(array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
        17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
        34, 35, 36, 37, 38, 39]),
 array([42, 44, 31, 32, 27, 29, 31,  0,  0,  0, 45, 47, 42, 42, 41, 45, 35,
        40, 33,  1, 46, 34, 27, 46,  1, 46, 44, 47, 46, 34,  1, 35, 45,  1,
        27,  1, 49, 41, 39, 27]))

In [18]:
np.where(x[1])

(array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
        17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
        34, 35, 36, 37, 38, 39]),
 array([32, 27, 29, 31,  0,  0,  0, 45, 47, 42, 42, 41, 45, 35, 40, 33,  1,
        46, 34, 27, 46,  1, 46, 44, 47, 46, 34,  1, 35, 45,  1, 27,  1, 49,
        41, 39, 27, 40,  8,  8]))

In [19]:
def sample(preds, temperature=1.0):
    # helper function to sample an index from a probability array
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)


def on_epoch_end(epoch, logs):
    # Function invoked at end of each epoch. Prints generated text.
    print()
    print('----- Generating text after Epoch: %d' % epoch)

    start_index = random.randint(0, len(text) - maxlen - 1)
#     for diversity in [0.2, 0.5, 1.0, 1.2]:
    for diversity in [1.0]:

        print('----- diversity:', diversity)

        generated = ''
        sentence = text[start_index: start_index + maxlen]
        generated += sentence
        print('----- Generating with seed: "' + sentence + '"')
        sys.stdout.write(generated)

        for i in range(400):
            x_pred = np.zeros((1, maxlen, len(chars)))
            for t, char in enumerate(sentence):
                x_pred[0, t, char_indices[char]] = 1.

            preds = model.predict(x_pred, verbose=0)[0]
            next_index = sample(preds, diversity)
            next_char = indices_char[next_index]

            generated += next_char
            sentence = sentence[1:] + next_char

            sys.stdout.write(next_char)
            sys.stdout.flush()
        print()

print_callback = LambdaCallback(on_epoch_end=on_epoch_end)

model.fit(x, y,
          batch_size=128,
          epochs=60,
          callbacks=[print_callback])

Epoch 1/60
----- Generating text after Epoch: 0
----- diversity: 1.0
----- Generating with seed: "ed in the conception
of "greatness", wit"
ed in the conception
of "greatness", withence is to gamines. the uncuries arcengous! whele: 
and uspure of will be natural leest defitions.=--ssmething,"
havam he artistle musice masusion, of us the spirituful,  joy one in possobled!
mein,

know, chararly, how of temso a simply, one is
purity betleest beantimated
been, of etherials or alto higherfoun, cear this artis!--the pealoust, uncoping exern same deeved-pre"s would ned warker and 
Epoch 2/60
----- Generating text after Epoch: 1
----- diversity: 1.0
----- Generating with seed: "es for human judgment and
deduction, and"
es for human judgment and
deduction, and avery saseous. he are
demanged by this the wilenrut" nother the lively amond intellecture actsaments owrroul how hand to the blecfiless in the heaven to gord pressifaliso retains
of mane and rewards that =ach it natessefzar honests time a

<keras.callbacks.History at 0x7fc8b56d8d30>