In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from tensorflow.keras.utils import to_categorical
import sys

text_corpus = """
In a hole in the ground there lived a hobbit. Not a nasty, dirty, wet hole, filled with the ends of worms and an oozy smell, nor yet a dry, bare, sandy hole with nowhere to sit down or anything to eat: it was a hobbit-hole, and that means comfort. It had a perfectly round door like a porthole, painted green, with a shiny yellow brass knob in the exact middle. The door opened on to a tube-shaped hall like a tunnel: a very perfect tunnel, with panelled walls, and floors tiled and carpeted, all lit by lamps, but not by windows.
"""

tokenizer = Tokenizer(oov_token="<unk>")
tokenizer.fit_on_texts([text_corpus])
word_index = tokenizer.word_index
total_words = len(word_index) + 1
input_sequences = []
for line in text_corpus.split('\n'):
    token_list = tokenizer.texts_to_sequences([line])[0]
    for i in range(1, len(token_list)):
        n_gram_sequence = token_list[:i+1]
        input_sequences.append(n_gram_sequence)
max_sequence_len = max([len(x) for x in input_sequences])
padded_sequences = np.array(pad_sequences(input_sequences, maxlen=max_sequence_len, padding='pre'))
X = padded_sequences[:, :-1]
y = padded_sequences[:, -1]
y = to_categorical(y, num_classes=total_words)

print("\nBuilding the Neural Network model...")
model = Sequential()
embedding_dim = 100
model.add(Embedding(total_words, embedding_dim, input_length=max_sequence_len - 1))
model.add(LSTM(150, return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(total_words, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

print("\nTraining the model with reduced epochs (50)...")
history = model.fit(X, y, epochs=50, verbose=1) # Epochs reduced from 100 to 50

def generate_text(seed_text, num_next_words, model, max_seq_len, tokenizer):
    """
    Generates text by predicting the next word repeatedly.

    Args:
        seed_text (str): The initial text sequence to start generation from.
        num_next_words (int): The number of words to predict and append.
        model (tf.keras.Model): The trained Keras model.
        max_seq_len (int): The maximum sequence length used during training.
        tokenizer (Tokenizer): The fitted Keras Tokenizer.

    Returns:
        str: The generated text.
    """
    for _ in range(num_next_words):
        token_list = tokenizer.texts_to_sequences([seed_text])[0]
        padded_token_list = pad_sequences([token_list], maxlen=max_sequence_len - 1, padding='pre')
        predicted_probabilities = model.predict(padded_token_list, verbose=0)[0]
        predicted_word_index = np.argmax(predicted_probabilities)
        output_word = ""
        for word, index in tokenizer.word_index.items():
            if index == predicted_word_index:
                output_word = word
                break
        seed_text += " " + output_word
    return seed_text

print("\n--- Generating Text ---")
seed_phrase_1 = "a hobbit"
generated_text_1 = generate_text(seed_phrase_1, 10, model, max_sequence_len, tokenizer)
print(f"Seed: '{seed_phrase_1}'")
print(f"Generated text: '{generated_text_1}'")

seed_phrase_2 = "the quick brown"
generated_text_2 = generate_text(seed_phrase_2, 8, model, max_sequence_len, tokenizer)
print(f"\nSeed: '{seed_phrase_2}'")
print(f"Generated text: '{generated_text_2}'")

seed_phrase_3 = "perfectly round door"
generated_text_3 = generate_text(seed_phrase_3, 12, model, max_sequence_len, tokenizer)
print(f"\nSeed: '{seed_phrase_3}'")
print(f"Generated text: '{generated_text_3}'")



Building the Neural Network model...



Training the model with reduced epochs (50)...
Epoch 1/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 158ms/step - accuracy: 0.0335 - loss: 4.2902
Epoch 2/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 166ms/step - accuracy: 0.1000 - loss: 4.2733
Epoch 3/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 163ms/step - accuracy: 0.1219 - loss: 4.2414
Epoch 4/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 155ms/step - accuracy: 0.1156 - loss: 4.1530
Epoch 5/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 157ms/step - accuracy: 0.0917 - loss: 4.1288
Epoch 6/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 165ms/step - accuracy: 0.1344 - loss: 4.0546
Epoch 7/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 164ms/step - accuracy: 0.1000 - loss: 4.0463
Epoch 8/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 161ms/step - accuracy: 0.1011 - loss: 4.0007
Epoch 9/