In [1]:
import keras
import numpy as np

path = keras.utils.get_file('nietzsche.txt', origin='https://s3.amazonaws.com/text-datasets/nietzsche.txt')
text = open(path).read().lower()
print('Size of Corpus : ', len(text))

Using TensorFlow backend.


Downloading data from https://s3.amazonaws.com/text-datasets/nietzsche.txt
Size of Corpus :  600893


In [2]:
maxlen = 60
step = 3

sentences = []
next_chars = []

for i in range(0, len(text)-maxlen, step):
    sentences.append(text[i : i + maxlen])
    next_chars.append(text[i + maxlen])

print('Length of Sequences : ', len(sentences))

chars = sorted(list(set(text)))
print('Characters : ', len(chars))
char_indices = dict((char, chars.index(char)) for char in chars)

print('Vectorizations....')
x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)

for i, sentence in enumerate(sentences):
    for t, char in enumerate(chars):
        x[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1

Length of Sequences :  200278
Characters :  58
Vectorizations....


In [3]:
from keras import layers
from keras.models import Sequential

model = Sequential()
model.add(layers.LSTM(128, input_shape=(maxlen, len(chars))))
model.add(layers.Dense(len(chars), activation='softmax'))

Instructions for updating:
Colocations handled automatically by placer.


In [4]:
optimizer = keras.optimizers.RMSprop(lr=0.01)
model.compile(loss = 'categorical_crossentropy', optimizer=optimizer)

In [5]:
def sample(preds, temperature=1.0):
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [7]:
import random
import sys

random.seed(42)
start_index = random.randint(0, len(text) - maxlen - 1)

for epoch in range(1, 22):
    print('Epcoh',epoch)
    model.fit(x, y, batch_size=128, epochs=1)
    
    seed_text = text[start_index: start_index + maxlen]
    if epoch >=20:
        print('---- Seed Text: "{}"'.format(seed_text))
    
    for temperature in [0.2, 0.5, 1.0, 1.2]:
        print('-----Temperature:', temperature)
        generated_text = seed_text
        sys.stdout.write(generated_text)
        
        for i in range(400):
            sampled = np.zeros((1, maxlen, len(chars)))
            for t, char in enumerate(generated_text):
                sampled[0, t, char_indices[char]] = 1.
            
            preds = model.predict(sampled, verbose=0)[0]
            next_index = sample(preds, temperature)
            next_char = chars[next_index]
            
            generated_text += next_char
            generated_text = generated_text[1:]
            
            sys.stdout.write(next_char)
            sys.stdout.flush()
        print()


a  ieoesen  ee ne e  ti  a oheo    erhst n hie  tt    teo   f ee ea et  so  dee  t e    sg n r    o nhs e hoen   e te  e in   to ts    een t   a r sa nse   ein  eee eaeir ane   lnnedt  aopedeo ts eiaheeaea ee   e   e hepo el r  es eeen ee e e  n  e  hn ed oe enntnet  e oh e eie asoiea s eotteoe e  a  e e
-----Temperature: 1.0
the slowly ascending ranks and classes, in which,
through foiht  tnsfaehht  rsln imuntones  pe- aiidrenehtru eetltne  de  eteleehegs a eb noe p  de i epelad  ab eeheoatl  eleltai  noiserroenteesea-ih tou  lr oesolteawtecet etehee ese  e i t es ae
o
st er ; l y isu oeatieneeegn  fohevto nt eyhte.n n  ,riurwe hfefelen sty f eaoh h itieonhee nohsr, s eteneshpsh irhexa ert ei - hd  etsotwtepd i ory fi  tit  hs eeht  aateoopoie  dtcestrh nem it  esolrhh nto  on
-----Temperature: 1.2
the slowly ascending ranks and classes, in which,
through foiw ipt 
est tgtooenooant mtmhfrer a  eg  i
ee srise sr diwiedneef .e heno
aannegega eaos rwos net  ,aeleotarnamcispefriraa u aiio