Reference : https://machinelearningmastery.com/text-generation-lstm-recurrent-neural-networks-python-keras/

In [6]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import LSTM
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.utils import to_categorical

In [7]:
# load ascii text and covert to lowercase
filename = "wonderland.txt"
raw_text = open(filename, 'r', encoding='utf-8').read()
raw_text = raw_text.lower()

In [8]:
# create mapping of unique chars to integers
chars = sorted(list(set(raw_text)))
char_to_int = dict((c, i) for i, c in enumerate(chars))
print(char_to_int)

{'\n': 0, ' ': 1, '!': 2, '#': 3, "'": 4, '(': 5, ')': 6, '*': 7, ',': 8, '-': 9, '.': 10, '0': 11, '1': 12, '2': 13, '3': 14, '4': 15, '7': 16, '8': 17, ':': 18, ';': 19, '?': 20, '[': 21, ']': 22, '_': 23, 'a': 24, 'b': 25, 'c': 26, 'd': 27, 'e': 28, 'f': 29, 'g': 30, 'h': 31, 'i': 32, 'j': 33, 'k': 34, 'l': 35, 'm': 36, 'n': 37, 'o': 38, 'p': 39, 'q': 40, 'r': 41, 's': 42, 't': 43, 'u': 44, 'v': 45, 'w': 46, 'x': 47, 'y': 48, 'z': 49, 'ù': 50, '—': 51, '‘': 52, '’': 53, '“': 54, '”': 55}


In [9]:
n_chars = len(raw_text)
n_vocab = len(chars)
print("Total Characters: ", n_chars)
print( "Total Vocab: ", n_vocab)

Total Characters:  117572
Total Vocab:  56


In [10]:
seq_length = 20
dataX = []
dataY = []
for i in range(0, n_chars - seq_length, 1):
 seq_in = raw_text[i:i + seq_length]
 seq_out = raw_text[i + seq_length]
 dataX.append([char_to_int[char] for char in seq_in])
 dataY.append(char_to_int[seq_out])
n_patterns = len(dataX)
print("Total Patterns: ", n_patterns)

Total Patterns:  117552


In [11]:
# reshape X to be [samples, time steps, features]
X = np.reshape(dataX, (n_patterns, seq_length, 1))
# normalize
X = X / float(n_vocab)
# one hot encode the output variable
y = to_categorical(dataY)

In [12]:
model = Sequential()
model.add(LSTM(256, input_shape=(X.shape[1], X.shape[2])))
model.add(Dropout(0.2))
model.add(Dense(y.shape[1], activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam')

  super().__init__(**kwargs)


In [16]:
# Define the checkpoint
filepath = "weights-improvement-{epoch:02d}-{loss:.4f}.keras"
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')
callbacks_list = [checkpoint]

In [17]:
model.fit(X, y, epochs=20, batch_size=128, callbacks=callbacks_list)

Epoch 1/20
[1m919/919[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step - loss: 3.1242
Epoch 1: loss improved from inf to 3.04943, saving model to weights-improvement-01-3.0494.keras
[1m919/919[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m54s[0m 56ms/step - loss: 3.1241
Epoch 2/20
[1m918/919[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 52ms/step - loss: 2.8881
Epoch 2: loss improved from 3.04943 to 2.86548, saving model to weights-improvement-02-2.8655.keras
[1m919/919[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 52ms/step - loss: 2.8881
Epoch 3/20
[1m918/919[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 58ms/step - loss: 2.7983
Epoch 3: loss improved from 2.86548 to 2.78273, saving model to weights-improvement-03-2.7827.keras
[1m919/919[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m54s[0m 58ms/step - loss: 2.7983
Epoch 4/20
[1m918/919[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 59ms/step - loss: 2.7264
Epoch 4: loss improv

<keras.src.callbacks.history.History at 0x26eb2594450>

In [13]:
# load the network weights
filename = "weights-improvement-20-2.1145.keras"
model.load_weights(filename)
model.compile(loss='categorical_crossentropy', optimizer='adam')

  saveable.load_own_variables(weights_store.get(inner_path))


In [14]:
int_to_char = dict((i, c) for i, c in enumerate(chars))

In [17]:
import sys
start = np.random.randint(0, len(dataX)-1)
pattern = dataX[start]
print("Seed:")
print("\"", ''.join([int_to_char[value] for value in pattern]), "\"")
# generate characters
for i in range(1000):
    x = np.reshape(pattern, (1, len(pattern), 1))
    x = x / float(n_vocab)
    prediction = model.predict(x, verbose=0)
    index = np.argmax(prediction)
    result = int_to_char[index]
    seq_in = [int_to_char[value] for value in pattern]
    sys.stdout.write(result)
    pattern.append(index)
    pattern = pattern[1:len(pattern)]
print("\nDone.")

Seed:
" the sky.

alice went "
 he aalitd d toin wfll oase 
ant the was aolntg to the ctoreusin of the crold an tou cank an toe tail tf the keoe  the wosed tai iowt the cate pat ie a lotte, and the dadt was ooteigg an the coold  she fad not deei to the thete was to ali toene th theeg aeain, and wast dnlning ano the rase thieg war soeee oo the taaei  and toened an the coold  she fad not deei to the thete was to ali toene th theeg aeain, and wast dnlning ano the rase thieg war soeee oo the taaei  and toened an the coold  she fad not deei to the thete was to ali toene th theeg aeain, and wast dnlning ano the rase thieg war soeee oo the taaei  and toened an the coold  she fad not deei to the thete was to ali toene th theeg aeain, and wast dnlning ano the rase thieg war soeee oo the taaei  and toened an the coold  she fad not deei to the thete was to ali toene th theeg aeain, and wast dnlning ano the rase thieg war soeee oo the taaei  and toened an the coold  she fad not deei to the thete w