# Load data

In [1]:
import keras
import numpy as np
path = keras.utils.get_file('nietzsche.txt', origin='https://s3.amazonaws.com/text-datasets/nietzsche.txt')
text = open(path).read().lower()
print('Corpus length:', len(text))

Using TensorFlow backend.


Corpus length: 600901


In order to control the amount of stochasticity in the sampling process, we’ll introduce a parameter called the softmax temperature that characterizes the entropy of the
probability distribution used for sampling: it characterizes how surprising or predictable the choice of the next character will be. Given a temperature value, a new probability distribution is computed from the original one (the softmax output of the
model) by reweighting it in the following way

# Reweighting a probability distribution to a different temperature
Bigger the temperature value more random the the distribution sampling is 

In [2]:
import numpy as np

def reweight_distribution(original_distribution, temperature=0.5):
    distribution = np.log(original_distribution) / temperature
    distribution = np.exp(distribution)
    return distribution / np.sum(distribution) # divide by sum to renormalize

# Vectorize the sequences of characters
extract partially overlapping sequences of length maxlen, one-hot encode
them, and pack them in a 3D Numpy array x of shape (sequences, maxlen,
unique_characters). Simultaneously, you’ll prepare an array y containing the corresponding targets: the one-hot-encoded characters that come after each extracted
sequence.

In [3]:
maxlen = 60         # length of sequence
step = 3            # new sequence in every 3 character

sentences = []      # hold the sentences
next_chars = []     # holds the next chars

for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i: i+maxlen])
    next_chars.append(text[i+maxlen])
    
print('number of sequences: ', len(sentences))

number of sequences:  200281


In [4]:
chars = sorted(list(set(text)))
print('number of unique chars ', len(chars))

number of unique chars  59


In [5]:
chars_indices = dict((char, chars.index(char)) for char in chars)

In [6]:
x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)

for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, chars_indices[char]] = 1
    y[i, chars_indices[next_chars[i]]] = 1

In [7]:
print(x.shape)
print(y.shape)

(200281, 60, 59)
(200281, 59)


# Building the model

In [8]:
from keras import layers

model = keras.models.Sequential()
model.add(layers.LSTM(128, input_shape=(maxlen, len(chars))))
model.add(layers.Dense(len(chars), activation='softmax'))

In [9]:
optimizer = keras.optimizers.RMSprop(lr=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)

# Training the model
Given a trained model and a seed text snippet, you can generate new text by doing the
following repeatedly:
1. Draw from the model a probability distribution for the next character, given the
generated text available so far.
2. Reweight the distribution to a certain temperature.
3. Sample the next character at random according to the reweighted distribution.
4. Add the new character at the end of the available text.
This is the code you use to reweight the original probability distribution coming out
of the model and draw a character index from it (the sampling function)

## Function to sample the next char given the model's parameter

In [10]:
def sample(preds, temperature=1.0):
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    
    probs = np.random.multinomial(1, preds, 1)    # tatal = 1, probs = prob dist, 1 output
    return np.argmax(probs)                       # index of char (0 - 58)

# Text generation loop

In [None]:
import random
import sys

for epoch in range(1, 60):
    print('\nepoch', epoch)
    model.fit(x, y, batch_size=128, epochs=1)
    start_index = random.randint(0, len(text) - maxlen - 1)
    generated_text = text[start_index: start_index + maxlen]
    print('--- Generating with seed: "' + generated_text + '"')

    for temperature in [0.2, 0.5, 1.0, 1.2]:
        print('\n\n------ temperature:', temperature)
        sys.stdout.write(generated_text)
        
        for i in range(400):
            sampled = np.zeros((1, maxlen, len(chars)))
            for t, char in enumerate(generated_text):
                sampled[0, t, chars_indices[char]] = 1.
                
            preds = model.predict(sampled, verbose=0)[0]
            next_index = sample(preds, temperature)
            next_char = chars[next_index]
            
            generated_text += next_char
            generated_text = generated_text[1:]
            
            sys.stdout.write(next_char)


epoch 1
Epoch 1/1
--- Generating with seed: "vidual. even exceptional men, who can think beyond their own"


------ temperature: 0.2
vidual. even exceptional men, who can think beyond their own the become of the because and the spirity of the struct the conself of the become of the expections of the experience and stret of the every the experience of the conterments of the stret of the who has the stret of the conself of the conterment of the conterman the self-conscience of the stret of the conself the become of the semple himself of the every the spiritual conself that the self-consci

------ temperature: 0.5
self of the every the spiritual conself that the self-conscience in the greater the the conterds, the conterments of even sceptions in the very because as in the man one sees this farter strance and can inscire for a feels in feel became them the master the been beer become and become excestions to the most gonatices that see constation of the philosophy of
the stralce and the 

ld es, remain the spirit, anvental with it sutpality: shon, not--but degree, which make sympute withds of they honderstungnion those moot human numbers and regalded nible.
pose should calsonce man soil,
love ruthical time, rate,
as stics must the health everstly tohing cleasures: womaken finel"? of creat craves, whime this own, missiviousnectis
inavalused
is fasuity, whithists instint,s which master is hy man tasted to grigmitous valued severively, and eve
epoch 5
Epoch 1/1
--- Generating with seed: "ious of every thinker
who "wishes to prove something"--that "


------ temperature: 0.2
ious of every thinker
who "wishes to prove something"--that the strong the sense of the sense of the spirit the sense of the spirit and discoveren and the present the fect the spirit the sense of the strong the strength of the sense the spirit the spirit the spirit the spirit and the spirit and such a prefender the sense of the strong the sense of the strength and the spirit the sense of the such and th

  app.launch_new_instance()


e individual, birdly pare
subslinglity which par7! of human
evist in the
tendon the cartoration highly kemprene,ter? and upon psefilifies
himself, just mustecly the namens of history compared yer th
epoch 6
Epoch 1/1
--- Generating with seed: "r as
such, he would one day have to say to himself: "the dev"


------ temperature: 0.2
r as
such, he would one day have to say to himself: "the developed and in the best the self-desires the same to the strength of the spirit of the strength to the sense of the other one must in the stand of the strength is a superioration of the present of the same to the most and superiority and something that the stand of the strength of the subject of the superiority of the most and the still a more to the most self-conscioural agreeable to self-somethin

------ temperature: 0.5
more to the most self-conscioural agreeable to self-something carry world, the superioration and consideration of the whole to every thinkers and the highest hand to everything and i