# Importing Libraries

In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense

# Training Text

In [2]:
text = """
I love deep learning and I love neural networks.
Neural networks can generate amazing text.
Deep learning is powerful for text generation.
GenAI is Sub set of AI.
AI as a whole is a big picture.
"""

# Tokenize Text

In [3]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts([text])
total_words = len(tokenizer.word_index) + 1

# Convert text → sequence of integers
token_list = tokenizer.texts_to_sequences([text])[0]

# Create Input Sequences for Training

In [13]:
sequences = []
for i in range(1, len(token_list)):
    sequences.append(token_list[:i+1])

max_len = max(len(seq) for seq in sequences)

sequences = pad_sequences(sequences, maxlen=max_len, padding="pre")

# split into input/output
x = sequences[:, :-1]
y = sequences[:, -1]

y = tf.keras.utils.to_categorical(y, num_classes=total_words)

print("\nDEBUG INFO:")
print("token_list:", token_list)
print("Length of token_list:", len(token_list))
print("Sequences count:", len(sequences))
print("max_len:", max_len)
print("x shape:", x.shape)
print("y shape:", y.shape)

total_words = len(tokenizer.word_index) + 1
print("total_words =", total_words)



DEBUG INFO:
token_list: [2, 3, 4, 5, 11, 2, 3, 6, 7, 6, 7, 12, 13, 14, 8, 4, 5, 1, 15, 16, 8, 17, 18, 1, 19, 20, 21, 9, 9, 22, 10, 23, 1, 10, 24, 25]
Length of token_list: 36
Sequences count: 35
max_len: 36
x shape: (35, 35)
y shape: (35, 26)
total_words = 26


# Build LSTM Model

In [16]:
model = Sequential([
    Embedding(input_dim=total_words, output_dim=64, input_length=max_len-1),
    LSTM(150, return_sequences=True),
    LSTM(100),
    Dense(total_words, activation='softmax')
])
model.build(input_shape=(None, max_len-1))
model.summary()

model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

# Train

In [17]:
model.fit(x, y, epochs=200, verbose=1)

Epoch 1/200
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 27ms/step - accuracy: 0.0295 - loss: 3.2582
Epoch 2/200
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step - accuracy: 0.1473 - loss: 3.2472
Epoch 3/200
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step - accuracy: 0.1768 - loss: 3.2364
Epoch 4/200
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step - accuracy: 0.1473 - loss: 3.2226
Epoch 5/200
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step - accuracy: 0.1074 - loss: 3.2017
Epoch 6/200
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step - accuracy: 0.1473 - loss: 3.1530
Epoch 7/200
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step - accuracy: 0.0780 - loss: 3.0951
Epoch 8/200
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step - accuracy: 0.0884 - loss: 3.0753
Epoch 9/200
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[3

<keras.src.callbacks.history.History at 0x7e1bc417b680>

# Text Generation

In [18]:
def generate_text(seed_text, next_words):
    for _ in range(next_words):
        seq = tokenizer.texts_to_sequences([seed_text])[0]
        seq = pad_sequences([seq], maxlen=max_len-1, padding="pre")
        predicted = np.argmax(model.predict(seq, verbose=0))

        for word, index in tokenizer.word_index.items():
            if index == predicted:
                seed_text += " " + word
                break
    return seed_text

print("\nGenerated text:")
print(generate_text("deep learning", 10))


Generated text:
deep learning deep and and and love neural neural networks networks can
