<a href="https://colab.research.google.com/github/HanzhouLiu/Deep-Learning-with-Python-Exercises/blob/main/LSTM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Text generation with LSTM

In [3]:
import keras
import numpy as np

path = keras.utils.get_file(
    'nietzsche.txt',
    origin='https://s3.amazonaws.com/text-datasets/nietzsche.txt')
text = open(path).read().lower()
print('Corpus length:', len(text))

Downloading data from https://s3.amazonaws.com/text-datasets/nietzsche.txt
Corpus length: 600893


In [4]:
# Length of extracted character sequences
maxlen = 60

# We sample a new sequence every `step` characters
step = 3

# This holds our extracted sequences
sentences = []

# This holds the targets (the follow-up characters)
next_chars = []

for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i: i + maxlen])
    next_chars.append(text[i + maxlen])
print('Number of sequences:', len(sentences))

# List of unique characters in the corpus
chars = sorted(list(set(text)))
print('Unique characters:', len(chars))
# Dictionary mapping unique characters to their index in `chars`
char_indices = dict((char, chars.index(char)) for char in chars)

# Next, one-hot encode the characters into binary arrays.
print('Vectorization...')
x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1

Number of sequences: 200278
Unique characters: 57
Vectorization...


In [5]:
from keras import layers

model = keras.models.Sequential()
model.add(layers.LSTM(128, input_shape=(maxlen, len(chars))))
model.add(layers.Dense(len(chars), activation='softmax'))

In [6]:
optimizer = keras.optimizers.RMSprop(lr=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)

In [7]:
def sample(preds, temperature=1.0):
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [8]:
import random
import sys

for epoch in range(1, 60):
    print('epoch', epoch)
    # Fit the model for 1 epoch on the available training data
    model.fit(x, y,
              batch_size=128,
              epochs=1)

    # Select a text seed at random
    start_index = random.randint(0, len(text) - maxlen - 1)
    generated_text = text[start_index: start_index + maxlen]
    print('--- Generating with seed: "' + generated_text + '"')

    for temperature in [0.2, 0.5, 1.0, 1.2]:
        print('------ temperature:', temperature)
        sys.stdout.write(generated_text)

        # We generate 400 characters
        for i in range(400):
            sampled = np.zeros((1, maxlen, len(chars)))
            for t, char in enumerate(generated_text):
                sampled[0, t, char_indices[char]] = 1.

            preds = model.predict(sampled, verbose=0)[0]
            next_index = sample(preds, temperature)
            next_char = chars[next_index]

            generated_text += next_char
            generated_text = generated_text[1:]

            sys.stdout.write(next_char)
            sys.stdout.flush()
        print()

epoch 1
--- Generating with seed: "e enlightenment of modern times with
the french revolution ("
------ temperature: 0.2
e enlightenment of modern times with
the french revolution (for the some the sensity of the read and the sensity and its the sting the spirit of the and in the sensing the sensity of the self-conscience of the strightent the sensity of the stinction of the sense and and the stinct the spirit of the self-conscience of the senses the sensity and as a men and the sen the sensity of the sensity of the spirit the spirit of the strenge the spirit of the sensity 
------ temperature: 0.5
 spirit the spirit of the strenge the spirit of the sensity and decelt the sempicion of the same the spirit and hight, the conceriently in the means, in mered for the haste-no in the man in greanty.
the sense of the facting the rame and and herment every one it is an mens refighting it is its the man as at for not the and there in the parard wish the spriting
the instinction of the read the 

  This is separate from the ipykernel package so we can avoid doing imports until


eptr.arele, prograity are rebuladical--know than wishess thereby has been the
epoch 18
--- Generating with seed: "and there is not a more foolish "if
possible"--to do away wi"
------ temperature: 0.2
and there is not a more foolish "if
possible"--to do away with a man who are the present man are the facts of the same and the subject the sense of the art and seems to the promise of the art of the fact of the problem of the property of the spirit of the profound the problem of the problem of the sense of the promise of the same and seems to the man is the man are been adventure to the commence of the sciences of the sense of the world of the art to the p
------ temperature: 0.5
f the sciences of the sense of the world of the art to the propertices of his states of a precisely that it is a had to this most consequently, and the property of perhaps something in the general
present the sentent the spirit which all its place at the greater sense of the spirit" when the complete and the artis