In [5]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Input
import tensorflow as tf
import logging
tf.get_logger().setLevel(logging.ERROR)

EPOCHS = 32
BATCH_SIZE = 256
INPUT_FILE_NAME = '/content/frankenstein.txt'
WINDOW_LENGTH = 40
WINDOW_STEP = 3
BEAM_SIZE = 8
NUM_LETTERS = 11
MAX_LENGTH = 50

In [6]:
file = open(INPUT_FILE_NAME, 'r', encoding='utf-8-sig')
text = file.read()
file.close()

# Make lowercase and remove newline and extra spaces.
text = text.lower()
text = text.replace('\n', ' ')
text = text.replace('  ', ' ')

# Encode characters as indices.
unique_chars = list(set(text))
char_to_index = dict((ch, index) for index,
                     ch in enumerate(unique_chars))
index_to_char = dict((index, ch) for index,
                     ch in enumerate(unique_chars))
encoding_width = len(char_to_index)

In [7]:
fragments = []
targets = []
for i in range(0, len(text) - WINDOW_LENGTH, WINDOW_STEP):
    fragments.append(text[i: i + WINDOW_LENGTH])
    targets.append(text[i + WINDOW_LENGTH])

# Convert to one-hot encoded training data.
X = np.zeros((len(fragments), WINDOW_LENGTH, encoding_width))
y = np.zeros((len(fragments), encoding_width))
for i, fragment in enumerate(fragments):
    for j, char in enumerate(fragment):
        X[i, j, char_to_index[char]] = 1
    target_char = targets[i]
    y[i, char_to_index[target_char]] = 1

In [9]:
model = Sequential()
model.add(Input(shape=(None, encoding_width), batch_size=BATCH_SIZE))
model.add(LSTM(128, return_sequences=True, dropout=0.2))
model.add(LSTM(128, dropout=0.2))
model.add(Dense(encoding_width, activation='softmax'))
model.compile(loss='categorical_crossentropy',
              optimizer='adam')
model.summary()
history = model.fit(X, y, validation_split=0.05,
                    batch_size=BATCH_SIZE,
                    epochs=EPOCHS, verbose=2,
                    shuffle=True)

Epoch 1/32
542/542 - 234s - 431ms/step - loss: 2.7784 - val_loss: 2.6887
Epoch 2/32
542/542 - 228s - 421ms/step - loss: 2.4591 - val_loss: 2.5733
Epoch 3/32
542/542 - 229s - 423ms/step - loss: 2.3499 - val_loss: 2.4938
Epoch 4/32
542/542 - 263s - 485ms/step - loss: 2.2739 - val_loss: 2.4138
Epoch 5/32
542/542 - 233s - 429ms/step - loss: 2.2203 - val_loss: 2.3730
Epoch 6/32
542/542 - 235s - 434ms/step - loss: 2.1798 - val_loss: 2.3422
Epoch 7/32
542/542 - 232s - 427ms/step - loss: 2.1463 - val_loss: 2.3201
Epoch 8/32
542/542 - 235s - 434ms/step - loss: 2.1154 - val_loss: 2.3007
Epoch 9/32
542/542 - 230s - 424ms/step - loss: 2.0929 - val_loss: 2.2863
Epoch 10/32
542/542 - 260s - 480ms/step - loss: 2.0632 - val_loss: 2.2621
Epoch 11/32
542/542 - 266s - 491ms/step - loss: 2.0447 - val_loss: 2.2749
Epoch 12/32
542/542 - 234s - 432ms/step - loss: 2.0194 - val_loss: 2.2459
Epoch 13/32
542/542 - 261s - 482ms/step - loss: 2.0005 - val_loss: 2.2240
Epoch 14/32
542/542 - 232s - 427ms/step - loss:

In [10]:
letters = 'the body '
one_hots = []
for i, char in enumerate(letters):
    x = np.zeros(encoding_width)
    x[char_to_index[char]] = 1
    one_hots.append(x)
beams = [(np.log(1.0), letters, one_hots)]

# Predict NUM_LETTERS into the future.
for i in range(NUM_LETTERS):
    minibatch_list = []
    # Create minibatch from one-hot encodings, and predict.
    for triple in beams:
        minibatch_list.append(triple[2])
    minibatch = np.array(minibatch_list)
    y_predict = model.predict(minibatch, verbose=0)
    new_beams = []
    for j, softmax_vec in enumerate(y_predict):
        triple = beams[j]
        # Create BEAM_SIZE new beams from each existing beam.
        for k in range(BEAM_SIZE):
            char_index = np.argmax(softmax_vec)
            new_prob = triple[0] + np.log(
                softmax_vec[char_index])
            new_letters = triple[1] + index_to_char[char_index]
            x = np.zeros(encoding_width)
            x[char_index] = 1
            new_one_hots = triple[2].copy()
            new_one_hots.append(x)
            new_beams.append((new_prob, new_letters,
                              new_one_hots))
            softmax_vec[char_index] = 0
    # Prune tree to only keep BEAM_SIZE most probable beams.
    new_beams.sort(key=lambda tup: tup[0], reverse=True)
    beams = new_beams[0:BEAM_SIZE]
for item in beams:
    print(item[1])

the body of the most
the body of the ligh
the body of the pres
the body of the morn
the body of the mont
the body of the more
the body of the pros
the body of the mort
