In [1]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.utils import to_categorical
import numpy as np

In [2]:
text = "I am coder girl"

tokenizer = Tokenizer()
tokenizer.fit_on_texts([text])
word_index = tokenizer.word_index
total_words = len(word_index) + 1

print("Word Index:", word_index)
print("Total words (vocab size):", total_words)

Word Index: {'i': 1, 'am': 2, 'coder': 3, 'girl': 4}
Total words (vocab size): 5


In [3]:
input_text = "I am coder"
target_word = "girl"

input_seq = tokenizer.texts_to_sequences([input_text])[0]
target_seq = tokenizer.texts_to_sequences([target_word])[0][0]

X = np.array([input_seq])
y = to_categorical([target_seq], num_classes=total_words)

print("Input sequence:", input_seq)
print("Target (one-hot):", y)

Input sequence: [1, 2, 3]
Target (one-hot): [[0. 0. 0. 0. 1.]]


In [4]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense

model = Sequential()
model.add(Embedding(input_dim=total_words, output_dim=10, input_length=3))
model.add(SimpleRNN(32))
model.add(Dense(total_words, activation='softmax'))

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()



In [5]:
model.fit(X, y, epochs=500, verbose=0)
print("Model training complete!")

Model training complete!


In [6]:
test_input = tokenizer.texts_to_sequences(["I am coder"])[0]
test_input = np.array([test_input])

pred = model.predict(test_input, verbose=0)
predicted_index = np.argmax(pred)

predicted_word = ""
for word, index in tokenizer.word_index.items():
    if index == predicted_index:
        predicted_word = word
        break

print(f"Predicted word: {predicted_word}")

Predicted word: girl
