In [1]:
import keras
import numpy as np

In [2]:
path = keras.utils.get_file(
    'nietzsche.txt',
    origin='https://s3.amazonaws.com/text-datasets/nietzsche.txt'
)
text = open(path).read().lower()

Downloading data from https://s3.amazonaws.com/text-datasets/nietzsche.txt
[1m600901/600901[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 8us/step


In [4]:
print('Corpus length: ',len(text))

Corpus length:  600901


In [5]:
maxlen = 60
step = 3
sentences = []
next_chars = []

In [6]:
for i in range(0 , len(text) - maxlen,step):
    sentences.append(text[i: i+maxlen])
    next_chars.append(text[i+maxlen])
print('Number of sequences: ', len(sentences))

Number of sequences:  200281


In [11]:
sentences[0:10]

['preface\n\n\nsupposing that truth is a woman--what then? is the',
 'face\n\n\nsupposing that truth is a woman--what then? is there ',
 'e\n\n\nsupposing that truth is a woman--what then? is there not',
 '\nsupposing that truth is a woman--what then? is there not gr',
 'pposing that truth is a woman--what then? is there not groun',
 'sing that truth is a woman--what then? is there not ground\nf',
 'g that truth is a woman--what then? is there not ground\nfor ',
 'hat truth is a woman--what then? is there not ground\nfor sus',
 ' truth is a woman--what then? is there not ground\nfor suspec',
 'uth is a woman--what then? is there not ground\nfor suspectin']

In [12]:
chars = sorted(list(set(text)))

In [13]:
print('Unique character: ',len(chars))

Unique character:  59


In [14]:
char_indices = dict((char, chars.index(char)) for char in chars)

In [17]:
char_indices

{'\n': 0,
 ' ': 1,
 '!': 2,
 '"': 3,
 "'": 4,
 '(': 5,
 ')': 6,
 ',': 7,
 '-': 8,
 '.': 9,
 '0': 10,
 '1': 11,
 '2': 12,
 '3': 13,
 '4': 14,
 '5': 15,
 '6': 16,
 '7': 17,
 '8': 18,
 '9': 19,
 ':': 20,
 ';': 21,
 '=': 22,
 '?': 23,
 '[': 24,
 ']': 25,
 '_': 26,
 'a': 27,
 'b': 28,
 'c': 29,
 'd': 30,
 'e': 31,
 'f': 32,
 'g': 33,
 'h': 34,
 'i': 35,
 'j': 36,
 'k': 37,
 'l': 38,
 'm': 39,
 'n': 40,
 'o': 41,
 'p': 42,
 'q': 43,
 'r': 44,
 's': 45,
 't': 46,
 'u': 47,
 'v': 48,
 'w': 49,
 'x': 50,
 'y': 51,
 'z': 52,
 '¤': 53,
 '¦': 54,
 '©': 55,
 '«': 56,
 'ã': 57,
 '†': 58}

In [19]:
x = np.zeros((len(sentences),maxlen,len(chars)), dtype=np.bool_)
y = np.zeros((len(sentences),len(chars)),dtype = np.bool_)

In [20]:
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i,t,char_indices[char]] = 1
    y[i , char_indices[next_chars[i]]] = 1

In [24]:
from keras import layers

In [25]:
model = keras.models.Sequential()
model.add(layers.LSTM(128, input_shape = (maxlen , len(chars))))
model.add(layers.Dense(len(chars), activation='softmax'))

  super().__init__(**kwargs)


In [27]:
optimizer = keras.optimizers.RMSprop(learning_rate=0.01)
model.compile(
    loss = 'categorical_crossentropy',
    optimizer= optimizer
)

In [29]:
def sample(preds , temperature=1.0):
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds)/temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1,preds,1)
    return np.argmax(probas)

In [30]:
import random
import sys

In [None]:
for epoch in range(1,60):
    print('epoch',epoch)
    model.fit(x,y,batch_size=128,epochs=1)
    start_index = random.randint(0,len(text)-maxlen-1)
    generated_text = text[start_index: start_index + maxlen]
    print('--- Generating with seed: "' + generated_text + '"')
    for temperature in [0.2, 0.5, 1.0, 1.2]:
        print('------ temperature:', temperature)
        sys.stdout.write(generated_text)
        for i in range(400):
            sampled = np.zeros((1,maxlen,len(chars)))
            for t,char in enumerate(generated_text):
                sampled[0,t,char_indices[char]] = 1
            preds = model.predict(sampled, verbose=0)[0]
            next_index = sample(preds,temperature)
            next_char = chars[next_index]
            generated_text += next_char
            generated_text = generated_text[1:]
            sys.stdout.write(next_char)

epoch 1
[1m1565/1565[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m141s[0m 88ms/step - loss: 2.3240
--- Generating with seed: "alness of all "will to power" before your eyes, that almost
"
------ temperature: 0.2
alness of all "will to power" before your eyes, that almost
and the religions and and regind the sathing of the speak of the speak and the precised the relights of the soul and the experstion of the present of the conscient of the speak, the extent of the pressions of the speak of the speak the speak and the spirituation of the scient of and and the perhaps of the conscient of the something the speakh of the desint of the perhaps of the great of the speak ------ temperature: 0.5
eakh of the desint of the perhaps of the great of the speak of the fuelly and conpresing the rectants of the speak is so as a supher the stand with the casted one life the great of perhaps the failly in the expets, and the every which the grout and which which regin the sach of the cause of the more