In [1]:
import numpy as np
from tensorflow import keras


In [2]:
path = keras.utils.get_file("nietzsche.txt",
                            origin = 'http://s3.amazonaws.com/text-datasets/nietzsche.txt')

Downloading data from http://s3.amazonaws.com/text-datasets/nietzsche.txt


In [3]:
text = open(path).read().lower()

In [4]:
print("Corpus Lenght: ", len(text))

Corpus Lenght:  600893


In [5]:
maxlen = 60
step = 3
sentences = []
next_chars = []
for i in range(0,len(text) - maxlen, step):
  sentences.append(text[i: i + maxlen])
  next_chars.append(text[i + maxlen])
print("Number of Sequences:" , len(sentences))

Number of Sequences: 200278


In [6]:
chars = sorted(list(set(text)))
print("Unique Characters: ", len(chars))

char_indices = dict((char, char.index(char))for char in chars)

Unique Characters:  57


In [7]:
# Next, one-hot encode the characters into binary arrays.
print('Vectorization ... ')
x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool_)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool_)
for i, sentence in enumerate(sentences):
  for t, char in enumerate(sentence):
    x[i, t, char_indices[char]] = 1
  y[i, char_indices[next_chars[i]]] = _
print(' ... done')

Vectorization ... 
 ... done


In [8]:
from tensorflow.keras import layers

model = keras.models.Sequential()
model.add(layers.LSTM(128, input_shape=(maxlen, len(chars))))
model.add(layers. Dense(len(chars), activation='softmax'))

optimizer = keras.optimizers.RMSprop(lr=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)

model. summary ()



Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 128)               95232     
                                                                 
 dense (Dense)               (None, 57)                7353      
                                                                 
Total params: 102585 (400.72 KB)
Trainable params: 102585 (400.72 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [9]:
def sample(preds, temperature=1.0):
  preds = np.asarray(preds). astype('float64')
  preds = np.log(preds) / temperature
  exp_preds = np.exp(preds)
  preds = exp_preds / np.sum(exp_preds)
  probas = np.random.multinomial(1, preds, 1)

  return np.argmax(probas)

In [10]:
import random
import sys

NUM_EPOCHS = 3
CHAR_GENERATED_TEXT = 400 # We generate 400 characters

for epoch in range(1, NUM_EPOCHS):
  print('epoch', epoch)

# Fit the model for 1 epoch on the available training data
  model.fit(x, y, batch_size=128, epochs=1)

# Select a text seed at random
  start_index = random.randint(0, len(text) - maxlen - 1)
  generated_text = text[start_index: start_index + maxlen]
  print(f" --- Generating with seed: \"{generated_text}\"")

epoch 1
 --- Generating with seed: "ing in body and soul. that, however,
which is most diseased "
epoch 2
 --- Generating with seed: "lf-observers who believe that there are
"immediate certainti"


In [11]:
for temperature in [0.2, 0.5, 1.0, 1.2]:
  print(f"-----temperature;{temperature}")
  sys. stdout.write(generated_text)

  for i in range(CHAR_GENERATED_TEXT):
    sampled = np.zeros((1, maxlen, len(chars)))
    for t, char in enumerate(generated_text):
      sampled[0, t, char_indices[char]] = 1.
    preds = model.predict(sampled, verbose=0)[0]
    next_index = sample(preds, temperature)
    next_char = chars[next_index]

  generated_text += next_char
  generated_text = generated_text[1:]

  sys.stdout.write(next_char)
  sys.stdout.flush()
print()




-----temperature;0.2
lf-observers who believe that there are
"immediate certainti(-----temperature;0.5
f-observers who believe that there are
"immediate certainti((-----temperature;1.0
-observers who believe that there are
"immediate certainti(((-----temperature;1.2
observers who believe that there are
"immediate certainti(((p
