In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Bidirectional, LSTM, Dense, Embedding
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Sample document
sample_document = [
    "The school is outer",
    "The scooty is near",
    "The tree is very tall"
]

# Hyperparameters
vocab_size = 10000
max_length = 5
embedding_dim = 50
hidden_units = 64

# Tokenize the text
tokenizer = Tokenizer(num_words=vocab_size)
tokenizer.fit_on_texts(sample_document)
sequences = tokenizer.texts_to_sequences(sample_document)

# Pad sequences to the same length
padded_sequences = pad_sequences(sequences, maxlen=max_length, padding='post')

# Prepare input and output data
X = padded_sequences[:, :-1]  # Input sequences
y = padded_sequences[:, 1:]   # Output sequences (shifted by one)

# Build the model
model = tf.keras.Sequential([
    Embedding(vocab_size, embedding_dim, input_length=max_length-1),
    Bidirectional(LSTM(hidden_units, return_sequences=True)),
    Dense(vocab_size, activation='softmax')
])

# Compile the model
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
model.fit(X, y, epochs=10, verbose=1)

# Predict the next word
def predict_next_word(text):
    sequence = tokenizer.texts_to_sequences([text])
    padded_sequence = pad_sequences(sequence, maxlen=max_length-1, padding='post')
    prediction = model.predict(padded_sequence)
    predicted_index = np.argmax(prediction)
    predicted_word = tokenizer.index_word.get(predicted_index, "<OOV>")
    return predicted_word



Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [None]:

# Test the model
test_text = "tree"
predicted_next_word = predict_next_word(test_text)
print("Predicted next word:", predicted_next_word)

Predicted next word: is
