In [3]:
import tensorflow
import tensorflow.keras
from tensorflow.keras import layers
from tensorflow.keras import models
from tensorflow.keras import optimizers
import numpy as np
import random

In [4]:
path = './nietzsche.txt'

In [5]:
text = open(path).read().lower()
print('Corpus length:', len(text))

Corpus length: 600918


In [10]:
maxlen = 60
step = 3
sentences = []
next_chars = []
for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i: i + maxlen])
    next_chars.append(text[i + maxlen])
print('Number of sequences:', len(sentences))
chars = sorted(list(set(text)))
print('Unique characters:', len(chars))
char_indices = dict((char, chars.index(char)) for char in chars)
print('Vectorization...')
x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1
print('Done!')

Number of sequences: 200286
Unique characters: 62
Vectorization...
Done!


In [16]:
next_chars[100]

'c'

In [14]:
x.shape

(200286, 60, 62)

In [None]:
next_chars

In [18]:
print(chars)

62


In [13]:
print(char_indices)

{'\n': 0, ' ': 1, '!': 2, '"': 3, "'": 4, '(': 5, ')': 6, ',': 7, '-': 8, '.': 9, '0': 10, '1': 11, '2': 12, '3': 13, '4': 14, '5': 15, '6': 16, '7': 17, '8': 18, '9': 19, ':': 20, ';': 21, '=': 22, '?': 23, '[': 24, ']': 25, '_': 26, 'a': 27, 'b': 28, 'c': 29, 'd': 30, 'e': 31, 'f': 32, 'g': 33, 'h': 34, 'i': 35, 'j': 36, 'k': 37, 'l': 38, 'm': 39, 'n': 40, 'o': 41, 'p': 42, 'q': 43, 'r': 44, 's': 45, 't': 46, 'u': 47, 'v': 48, 'w': 49, 'x': 50, 'y': 51, 'z': 52, '\xa0': 53, '¤': 54, '¦': 55, '©': 56, '«': 57, 'â': 58, 'ã': 59, 'ƒ': 60, '€': 61}


In [20]:
print(x.shape)

(200286, 60, 62)


In [21]:
print(y.shape)

(200286, 62)


In [17]:
def sample(preds, temperature=1.0):
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [18]:
model = models.Sequential()
model.add(layers.LSTM(128, input_shape=(maxlen, len(chars))))
model.add(layers.Dense(len(chars), activation='softmax'))
optimizer = optimizers.RMSprop(lr=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)

In [42]:
for epoch in range(1, 60):
    print('epoch', epoch)
    model.fit(x, y, batch_size=128, epochs=1)
    start_index = random.randint(0, len(text) - maxlen - 1)
    generated_text = text[start_index: start_index + maxlen]
    print('--- Generating with seed: "' + generated_text + '"')
    for temperature in [0.2, 0.5, 1.0, 1.2]:
        print('------ temperature:', temperature)
        print(generated_text, end='')
        for i in range(400):
            sampled = np.zeros((1, maxlen, len(chars)))
            for t, char in enumerate(generated_text):
                sampled[0, t, char_indices[char]] = 1.
            preds = model.predict(sampled, verbose=0)[0]
            next_index = sample(preds, temperature)
            next_char = chars[next_index]
            generated_text += next_char
            generated_text = generated_text[1:]
            print(next_char, end='')
        print()

epoch 1
Train on 200286 samples
--- Generating with seed: "r knows the purpose and the
ignominy of the cross--how ghost"
------ temperature: 0.2
r knows the purpose and the
ignominy of the cross--how ghostht stronger of the servical of the contrary sense of the contrary soul and still the sense of the destruition to the sense of the contrary soul of the contrary and the spirit and the strength and the struggle to the sense of the contrary and every contrary and strong and still strong and strong and the conscience to the sense of the habit to the conscience of the conscience of the conscience of th
------ temperature: 0.5
 to the conscience of the conscience of the conscience of the conscience of the first and the constraint that the fact in all the self-artists in the passions in the devil and destruition of the self-present the world have not a way of the course soul as must be the moral power; the fact and look of the orparary still ascetic democratic suffered the former the intelle

KeyboardInterrupt: 