In [1]:
from tensorflow import keras
from tensorflow.keras import layers

import numpy as np
import random
import io

In [3]:
path = keras.utils.get_file(
    "nietzsche.txt", origin="https://s3.amazonaws.com/text-datasets/nietzsche.txt"
)

Downloading data from https://s3.amazonaws.com/text-datasets/nietzsche.txt


In [5]:
with io.open(path, encoding="utf-8") as f:
    text = f.read().lower()
text = text.replace("\n", " ")  # We remove newlines chars for nicer display
print("Corpus length:", len(text))

Corpus length: 600893


In [12]:
chars = sorted(list(set(text)))
print("Total chars:", len(chars))
char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))

Total chars: 56


In [46]:
# cut the text in semi-redundant sequences of maxlen characters
maxlen = 40
step = 3
sentences = []
next_chars = []
for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i : i + maxlen])
    next_chars.append(text[i + maxlen])
print("Number of sequences:", len(sentences))
x = np.zeros((len(sentences), maxlen, len(chars)), dtype=bool)
y = np.zeros((len(sentences), len(chars)), dtype=bool)
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1

Number of sequences: 200285


In [44]:
print(sentences[0:10]) #see the first sentences of 40 chars

['preface   supposing that truth is a woma', 'face   supposing that truth is a woman--', 'e   supposing that truth is a woman--wha', ' supposing that truth is a woman--what t', 'pposing that truth is a woman--what then', 'sing that truth is a woman--what then? i', 'g that truth is a woman--what then? is t', 'hat truth is a woman--what then? is ther', ' truth is a woman--what then? is there n', 'uth is a woman--what then? is there not ']


In [48]:
print(x[0:1])

[[[False False False ... False False False]
  [False False False ... False False False]
  [False False False ... False False False]
  ...
  [False False False ... False False False]
  [False False False ... False False False]
  [False False False ... False False False]]]


In [49]:
languages = ['Python', 'Java', 'JavaScript']

enumerate_prime = enumerate(languages)

In [50]:
print(enumerate_prime)

<enumerate object at 0x000000000682F340>


In [51]:
model = keras.Sequential(
    [
        keras.Input(shape=(maxlen, len(chars))),
        layers.LSTM(128),
        layers.Dense(len(chars), activation="softmax"),
    ]
)
optimizer = keras.optimizers.RMSprop(learning_rate=0.01)
model.compile(loss="categorical_crossentropy", optimizer=optimizer)

In [52]:
def sample(preds, temperature=1.0):
    # helper function to sample an index from a probability array
    preds = np.asarray(preds).astype("float64")
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [53]:
x[1]

array([[False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       ...,
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False]])

In [54]:
epochs = 40
batch_size = 128

for epoch in range(epochs):
    model.fit(x, y, batch_size=batch_size, epochs=1)
    print()
    print("Generating text after epoch: %d" % epoch)

    start_index = random.randint(0, len(text) - maxlen - 1)
    for diversity in [0.2, 0.5, 1.0, 1.2]:
        print("...Diversity:", diversity)

        generated = ""
        sentence = text[start_index : start_index + maxlen]
        print('...Generating with seed: "' + sentence + '"')

        for i in range(400):
            x_pred = np.zeros((1, maxlen, len(chars)))
            for t, char in enumerate(sentence):
                x_pred[0, t, char_indices[char]] = 1.0
            preds = model.predict(x_pred, verbose=0)[0]
            next_index = sample(preds, diversity)
            next_char = indices_char[next_index]
            sentence = sentence[1:] + next_char
            generated += next_char

        print("...Generated: ", generated)
        print()


Generating text after epoch: 0
...Diversity: 0.2
...Generating with seed: "sure of christian sentiments, that the s"
...Generated:  trenged and franger with the whole to the senses of the most such a mankind and something of the serman and a find and possestion of the will and and the that the will to the world to the the spirit and strong and and the spirit to the propard and the something and possesses and desire of the world and and the which is the spirit of the sermom and sermous and strong and sensed of the whole to the 

...Diversity: 0.5
...Generating with seed: "sure of christian sentiments, that the s"
...Generated:  empach a facture and our self-with a some a some and desiment and refility and who loveful, and considical to the propressions of the spirit that the world, and grang to the things and stand of the proportions to the theme it our self-do for the become and doution, more possestions, and served and understands, in the arridger and a came so a religious believed t

KeyboardInterrupt: 