In [1]:
import os
import numpy as np
import tensorflow as tf

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import LSTM
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras import utils

In [2]:
filename = r"datasets\Wonderland.txt"
raw_text = open(filename, "r", encoding="utf-8").read()
raw_text = raw_text.lower()

In [3]:
# data preparation
chars = sorted(list(set(raw_text)))
char_to_int = dict((c, i) for i, c in enumerate(chars))

In [4]:
n_chars = len(raw_text)
n_vocab = len(chars)

In [5]:
print(f"len of text: {n_chars}")
print(f"ttl characters {n_vocab}")

len of text: 144572
ttl characters 49


In [6]:
seq_length = 100
data_X = []
data_y = []

for i in range(0, n_chars - seq_length, 1):
    seq_in = raw_text[i:i + seq_length]
    seq_out = raw_text[i + seq_length]
    
    data_X.append([char_to_int[char] for char in seq_in])
    data_y.append(char_to_int[seq_out])
    
n_patterns = len(data_X)

In [7]:
# print(f"ttl patterns: {n_patterns}")    

In [8]:
X = np.reshape(data_X, (n_patterns, seq_length, 1))
X = X / float(n_vocab)

y = utils.to_categorical(data_y)

In [9]:
model = Sequential()
model.add(LSTM(256, input_shape=(X.shape[1], X.shape[2])))
model.add(Dropout(0.2))
model.add(Dense(y.shape[1], activation="softmax"))
model.compile(loss="categorical_crossentropy", optimizer="adam")

In [10]:
filepath="weights-improvement-{epoch:02d}-{loss:.4f}.hdf5"
checkpoint = ModelCheckpoint(
    filepath,
    monitor='loss',
    verbose=1,
    save_best_only=True,
    mode='min'
    )
callbacks_list = [checkpoint]

In [11]:
model.fit(X, y, epochs=20, batch_size=128, callbacks=callbacks_list)

Epoch 1/20

Epoch 00001: loss improved from inf to 3.02407, saving model to weights-improvement-01-3.0241.hdf5
Epoch 2/20

Epoch 00002: loss improved from 3.02407 to 2.81985, saving model to weights-improvement-02-2.8198.hdf5
Epoch 3/20

Epoch 00003: loss improved from 2.81985 to 2.71938, saving model to weights-improvement-03-2.7194.hdf5
Epoch 4/20

Epoch 00004: loss improved from 2.71938 to 2.64502, saving model to weights-improvement-04-2.6450.hdf5
Epoch 5/20

Epoch 00005: loss improved from 2.64502 to 2.57673, saving model to weights-improvement-05-2.5767.hdf5
Epoch 6/20

Epoch 00006: loss improved from 2.57673 to 2.51569, saving model to weights-improvement-06-2.5157.hdf5
Epoch 7/20

Epoch 00007: loss improved from 2.51569 to 2.46563, saving model to weights-improvement-07-2.4656.hdf5
Epoch 8/20

Epoch 00008: loss improved from 2.46563 to 2.41640, saving model to weights-improvement-08-2.4164.hdf5
Epoch 9/20

Epoch 00009: loss improved from 2.41640 to 2.37004, saving model to weig

<tensorflow.python.keras.callbacks.History at 0x23aae18c610>

In [14]:
tf.keras.models.save_model(model, r"models/model_lstm_1")




INFO:tensorflow:Assets written to: models/model_lstm_1\assets


INFO:tensorflow:Assets written to: models/model_lstm_1\assets


In [12]:
filename = os.path.join("weights", "weights-improvement-20-2.0035.hdf5")
model.load_weights(filename)
model.compile(loss="categorical_crossentropy", optimizer="adam")

In [28]:
int_to_char = dict((i, c) for i, c in enumerate(chars))

In [40]:
SEED = 42
ITERS = 100
final_line = ""

In [41]:
np.random.seed(SEED)
start = np.random.randint(0, len(data_X) - 1)

pattern = data_X[start]

print(''.join([int_to_char[value] for value in pattern]))

for i in range(ITERS):
    x = np.reshape(pattern, (1, len(pattern), 1))
    x = x / float(n_vocab)
    prediction = model.predict(x, verbose=0)
    idx = np.argmax(prediction)
    result = int_to_char[idx]
    
    final_line = final_line + result
    
    seq_in = [int_to_char[value] for value in pattern]
    pattern.append(idx)
    pattern = pattern[1: len(pattern)]


    beautiful, beauti—ful soup!”


“chorus again!” cried the gryphon, and the mock turtle had just be


In [42]:
print(final_line)

lut an in ano drene th the tibte 
andce wan aniin  the had nutt then the was soi tired sar th the li
