In [1]:
import keras
import numpy as np

path = keras.utils.get_file(
    '/Users/bifnudozhao/Projects/ai-playground/datasets/nietzshe.txt',
    origin='https://s3.amazonaws.com/text-datasets/nietzsche.txt'
)

text = open(path).read().lower()
print('Corpus length: ', len(text))

Corpus length:  600893


获取语料之后，先使用 `maxlen` 产生相互重叠的序列，用 one-hot 编码，然后将它们组织为一个形状为 `(sequences, maxlen, unique_charaters)` 的 numpy 数组。

In [2]:
maxlen = 60
step = 3 # sample a new sequence every three characters
sentences = []
next_chars = []

for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i: i + maxlen])
    next_chars.append(text[i + maxlen])

print('Number of sequences: ', len(sentences))

chars = sorted(list(set(text)))
print('Unique characters: ', len(chars))
char_indices = dict((char, chars.index(char)) for char in chars)

x = np.zeros((len(sentences), maxlen, len(chars)), dtype=bool)
y = np.zeros((len(sentences), len(chars)), dtype=bool)

for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1

Number of sequences:  200278
Unique characters:  57


In [5]:
from keras import layers
import tensorflow as tf

model = keras.models.Sequential()
model.add(tf.compat.v1.keras.layers.CuDNNLSTM(128, input_shape=(maxlen, len(chars))))
model.add(layers.Dense(len(chars), activation='softmax'))

optimizer = tf.keras.optimizers.legacy.RMSprop(learning_rate=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)

In [6]:
def sample(preds, temperature=1.0):
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [8]:
import random
import sys

for epoch in range(1, 61):
    print('epoch', epoch)
    # fits the model for one iteration on the data
    model.fit(x, y, batch_size=128, epochs=1)

    # only outputs temperal result at epoch 10, 20, 30, 40, 50, 60
    if epoch % 10 != 0: continue

    start_index = random.randint(0, len(text) - maxlen - 1)
    generated_text = text[start_index:start_index + maxlen]
    print('--- Generateing with seed: ', generated_text)

    for temperature in [0.2, 0.5, 1.0, 1.2]:
        print('----- temperature: ', temperature)
        sys.stdout.write(generated_text)

        # generates 400 characters, starting from the seed text
        for i in range(400):
            sampled = np.zeros((1, maxlen, len(chars)))
            for t, char in enumerate(generated_text):
                sampled[0, t, char_indices[char]] = 1.

            preds = model.predict(sampled, verbose=0)[0]
            next_index = sample(preds, temperature)
            next_char = chars[next_index]

            generated_text += next_char
            generated_text = generated_text[1:]

            sys.stdout.write(next_char)

epoch 1
epoch 2
epoch 3
epoch 4
epoch 5
epoch 6
epoch 7
epoch 8
epoch 9
epoch 10
--- Generateing with seed:  ingness, and as it were lighter and darker shades and
tones 
----- temperature:  0.2
ingness, and as it were lighter and darker shades and
tones of the most many the strength in the strength of the present and the most more most disposition of the most and from the most many the most conscience of the contemntly that the discourse, and the sense of the sense of the state of the most such a such a desire that the most consideration of the strength and something and the contend the most the most instinct of the religion of the supportual and----- temperature:  0.5
most the most instinct of the religion of the supportual and experience of europe of nature, that the lack of the contraded
to which all that the spirit and contemntly, in such a things that the present free them discipline of the call the "much in the strange to the condections that the free spirit, and the conception i

  preds = np.log(preds) / temperature


n actions and man and man are so its own conception of the same a person and former and presen----- temperature:  0.5
ts own conception of the same a person and former and present perpetually be adventuries and standard and the sense of the powerful the the
sacrifice of the protent of the
world of life
from this really same the first refules to man arises and the most exceptional consideration and conduct of the at has a man of the pertorate and the highest enthulthes and the disposing the powerful person to the most pleasure, and as a pertont of the act of the philosophe----- temperature:  1.0
most pleasure, and as a pertont of the act of the philosopher that man in our perfestion of the definer stalk it respict of, how how we be danger, notarability,--but here in its true nepthing far and puown wholly even he cals aro,

     thus dow the experienciuation, in form it save in theolt into be
part, and the nairanciets the uniavence, exception forowh"; all the fact the creation, the
intel