In [4]:
import numpy as np
import keras

Using TensorFlow backend.


In [3]:
def recalculate_distribution(original_distrib, temperature=0.5):
    distrib = np.exp(np.log(original_distrib)/temperature)
    return distrib/np.sum(distrib)

In [5]:
path = keras.utils.get_file('nietzsche.txt',origin='https://s3.amazonaws.com/text-datasets/nietzsche.txt')
text = open(path).read().lower()
print('Corpus length:', len(text))

Downloading data from https://s3.amazonaws.com/text-datasets/nietzsche.txt
Corpus length: 600901


In [6]:
maxlen = 60
step = 3
sentences = []
next_chars = []

for i in range(0, len(text)-maxlen, step):
    sentences.append(text[i:i+maxlen])
    next_chars.append(text[i+maxlen])
    
print('Number of sequences:', len(sentences))
chars = sorted(list(set(text)))
print('Unique characters: ', len(chars))
char_indices = dict((char,chars.index(char)) for char in chars)


# One-hot encode
x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)

for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i,t,char_indices[char]] = 1
    y[i,char_indices[next_chars[i]]] = 1

Number of sequences: 200281
Unique characters:  59


In [7]:
from keras import layers

model = keras.models.Sequential()
model.add(layers.LSTM(128,input_shape=(maxlen,len(chars))))
model.add(layers.Dense(len(chars),activation='softmax'))

In [8]:
optimizer = keras.optimizers.RMSprop(lr=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)

In [9]:
def sample(preds, temp=1.0):
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds)/temp
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [14]:
import random
import sys

for epoch in range(1,10):
    print('epoch ', epoch)
    model.fit(x, y, batch_size=128, epochs=1)
    start_index = random.randint(0, len(text)-maxlen-1)
    generated_text = text[start_index: start_index+maxlen]
    print('\n--- Generating with seed: "' + generated_text + '"')
    
    for temperature in [0.2, 0.5, 1.0, 1.2]:
        print('\n------ temperature:', temperature)
        sys.stdout.write(generated_text)
        
        for i in range(400):
            sampled = np.zeros((1, maxlen, len(chars)))
            for t, char in enumerate(generated_text):
                sampled[0,t,char_indices[char]] = 1.
                
            preds = model.predict(sampled, verbose=0)[0]
            next_index = sample(preds, temperature)
            next_char = chars[next_index]
            
            generated_text += next_char
            generated_text = generated_text[1:]
            sys.stdout.write(next_char)

epoch  1
Epoch 1/1

--- Generating with seed: " of woman is slowly evolving? oh europe! europe! we know
the"

------ temperature: 0.2
 of woman is slowly evolving? oh europe! europe! we know
the history of the spirit of the same not of the presence of the presence of the sense and the sense of the present of the presence of the sense of the presence of the present to the presence of the religious and the same time of the same and will and deceives to the same by the present of the same time of the same time and sense of the presence of the same time of the presence of the same part of th
------ temperature: 0.5
ence of the same time of the presence of the same part of the distinctive has
invilling in the same histiness, it is antither and stand of the mankind to the nature and christian right and pirts of his religion of the prigise in the sense presence of the distinction of the briepened to the how and sympathy is presence of the faitions of the same call as a stranges of the presenc