In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
from tensorflow.keras.callbacks import EarlyStopping
import re

In [None]:
with open("shakespeare.txt", "r", encoding="utf-8") as f:
    text = f.read()

In [3]:
text = text.lower()
text = re.sub(r"[^a-z\s]", "", text)

In [4]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts([text])

total_words = len(tokenizer.word_index) + 1

In [5]:
input_sequences = []
tokens = tokenizer.texts_to_sequences([text])[0]

sequence_length = 20

In [6]:
for i in range(sequence_length, len(tokens)):
    seq = tokens[i-sequence_length:i+1]
    input_sequences.append(seq)

input_sequences = np.array(input_sequences)

In [7]:
X = input_sequences[:, :-1]
y = input_sequences[:, -1]

In [8]:
model = Sequential([
    Embedding(total_words, 100, input_length=sequence_length),
    LSTM(150),
    Dense(total_words, activation='softmax')
])

model.compile(
    loss='sparse_categorical_crossentropy',
    optimizer='adam',
    metrics=['accuracy']
)


model.summary()



In [None]:
early_stop = EarlyStopping(monitor='loss', patience=3)

X = X[:200000]
y = y[:200000]

model.fit(
    X, y,
    epochs=15,
    batch_size=128,
    callbacks=[early_stop]
)


Epoch 1/15
[1m1009/1563[0m [32m━━━━━━━━━━━━[0m[37m━━━━━━━━[0m [1m42s[0m 77ms/step - accuracy: 0.0316 - loss: 7.4086 

In [None]:
def generate_text(seed_text, next_words=30):
    for _ in range(next_words):
        token_list = tokenizer.texts_to_sequences([seed_text])[0]
        token_list = pad_sequences(
            [token_list],
            maxlen=sequence_length,
            padding='pre'
        )

        predicted = np.argmax(model.predict(token_list, verbose=0))
        output_word = tokenizer.index_word.get(predicted, "")

        seed_text += " " + output_word

    return seed_text

In [None]:
print(generate_text("to be or not to be", 40))
print(generate_text("love is a smoke", 40))