In [22]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences


In [23]:
text = """
artificial intelligence is transforming modern society.
it is used in healthcare finance education and transportation.
machine learning allows systems to improve automatically with experience.
data plays a critical role in training intelligent systems.
large datasets help models learn complex patterns.
deep learning uses multi layer neural networks.
neural networks are inspired by biological neurons.
each neuron processes input and produces an output.
training a neural network requires optimization techniques.
gradient descent minimizes the loss function.
natural language processing helps computers understand human language.
text generation is a key task in nlp.
language models predict the next word or character.
recurrent neural networks handle sequential data.
lstm and gru models address long term dependency problems.
"""


In [24]:
text = text.lower().replace(".", "")
tokenizer = Tokenizer()
tokenizer.fit_on_texts([text])

word_index = tokenizer.word_index
index_word = tokenizer.index_word
vocab_size = len(word_index) + 1


In [25]:
tokens = tokenizer.texts_to_sequences([text])[0]

SEQ_LENGTH = 5
X = []
y = []

for i in range(len(tokens) - SEQ_LENGTH):
    X.append(tokens[i:i + SEQ_LENGTH])
    y.append(tokens[i + SEQ_LENGTH])

X = np.array(X)
y = np.array(y)


In [26]:
model = Sequential([
    Embedding(input_dim=vocab_size, output_dim=64, input_length=SEQ_LENGTH),
    LSTM(128),
    Dense(vocab_size, activation="softmax")
])

model.compile(
    loss="sparse_categorical_crossentropy",
    optimizer="adam"
)

model.summary()




In [27]:
model.fit(
    X,
    y,
    epochs=40,
    batch_size=16
)


Epoch 1/40
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - loss: 4.5337
Epoch 2/40
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 4.5222
Epoch 3/40
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - loss: 4.5128
Epoch 4/40
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 4.5025
Epoch 5/40
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 4.4870
Epoch 6/40
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 4.4648
Epoch 7/40
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 4.4350 
Epoch 8/40
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 4.3689
Epoch 9/40
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 4.2516
Epoch 10/40
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - loss: 4.0349
Epoch 11/40
[1m7/7

<keras.src.callbacks.history.History at 0x790680167f20>

In [28]:
def generate_text(seed_text, length=30):
    seed_text = seed_text.lower().split()
    generated = seed_text.copy()

    for _ in range(length):
        encoded = tokenizer.texts_to_sequences(
            [" ".join(generated[-SEQ_LENGTH:])]
        )[0]

        encoded = pad_sequences(
            [encoded], maxlen=SEQ_LENGTH, padding="pre"
        )

        prediction = model.predict(encoded, verbose=0)
        next_word_index = np.argmax(prediction)
        next_word = index_word.get(next_word_index, "")

        generated.append(next_word)

    return " ".join(generated)


In [29]:
print(generate_text("artificial intelligence is"))


artificial intelligence is networks society society it healthcare a in healthcare finance transportation nlp transportation machine learning to improve to improve automatically data plays data plays a in training in training intelligent datasets
