In [1]:
import sys

import matplotlib.pyplot as plt
import numpy as np
from keras.utils import get_file
from keras import models, optimizers
from keras.layers import Dense, LSTM

Using TensorFlow backend.


In [2]:
# Get the text file
path = get_file('nietzsche.txt', origin='https://s3.amazonaws.com/text-datasets/nietzsche.txt')

with open(path) as f:
    text = f.read().lower()
print(f"The length of the corpus is: {len(text)}")

The length of the corpus is: 600893


In [3]:
# Length of the extracted character sequences
max_len = 60

# Sample sequences every `step` characters
step = 3

# Hold extracted sequences
sequences = []

# Hold target of extracted sequences (i.e. next character)
next_chars = []

# Iterate over corpus to extract sequences and next charater
# for each sequeence every `step` characters
for i in range(0, len(text) - max_len, step):
    sequences.append(text[i:i + max_len])
    next_chars.append(text[i + max_len])
print(f'Number of sequences is: {len(sequences)}')

# Extract the unique character from text and sort them
chars = sorted(list(set(text)))
print(f'Number of unique characters is: {len(chars)}')

# Define a char_indices dictionary that maps each unique character to its index
char_indices = {char:chars.index(char) for char in chars}

# One-hot encode characters into arrays
X = np.zeros((len(sequences), max_len, len(chars)))
y = np.zeros((len(sequences), len(chars)))
for i, sentence in enumerate(sequences):
    for t, char in enumerate(sentence):
        X[i, t, char_indices[char]] = 1
        y[i, char_indices[next_chars[i]]] = 1
    
print(f'Feature shape: {X.shape}; Target shape: {y.shape}')

Number of sequences is: 200278
Number of unique characters is: 57
Feature shape: (200278, 60, 57); Target shape: (200278, 57)


In [4]:
# Build the model
model = models.Sequential()
model.add(LSTM(128, input_shape=(max_len, len(chars))))
model.add(Dense(len(chars), activation='softmax'))
model.compile(optimizer=optimizers.rmsprop(lr=0.01),
              loss='categorical_crossentropy')

In [5]:
# Define sampling method that reweighting distribution of
# softmax output based on temperature
def sample(preds, temperature):
    preds = np.asarray(preds, 'float64')
    preds = np.log(preds) / temperature
    preds = np.exp(preds)
    # Normalize pred
    probs = preds / np.sum(preds)
    out = np.random.multinomial(1, probs, 1)
    return np.argmax(out)

In [7]:
# Iterate over num_epochs to train the model
for epoch in range(2):
    # Fit the model one epoch at a time
    print(f'epoch : {epoch}')
    model.fit(X, y, batch_size=128, epochs=1)
    
    # Select text seed at random
    start_index = np.random.randint(0, len(text) - max_len - 1)
    generated_text = text[start_index:start_index + max_len]
    print(f'----- Generated with seed:\n{generated_text}')
    
    # Iterate over different values of softmax temperatures
    for temperature in [0.2, 0.5, 1, 1.2]:
        print(f'----- Temperature used is : {temperature}')
        sys.stdout.write(generated_text)
        # Generate 400 characters
        for i in range(400):
            sampled = np.zeros((1, max_len, len(chars)))
            
            # One-hot encode randomly generated text
            for t, char in enumerate(generated_text):
                sampled[0, t, char_indices[char]] = 1
            
            preds = model.predict(sampled, verbose=0)[0]
            next_index = sample(preds, temperature)
            next_char = chars[next_index]
            generated_text += next_char
            generated_text = generated_text[1:]
            
            sys.stdout.write(next_char)
        print()

epoch : 0
Epoch 1/1
----- Generated with seed:
. fear and sympathy it is with these feelings that man has
h
----- Temperature used is : 0.2
. fear and sympathy it is with these feelings that man has
have and some of the state and the state the strither and the stright of the man and all the state of the man the better the present and successed and one and in the states that the same in the same and some of the states has and man and an all the present that the same and respect that the man in the strith the same that the present and are something and some of the same in the subject of the pres
----- Temperature used is : 0.5
re something and some of the same in the subject of the present of the love,
and there all itself a successes of the more realong of the general existence that is the same and the germans that which when the present all that which is not be germans and the general consequently habed and some near of the something have and degnesting mankind not as a state and all t