Experiment 11: Next Word Prediction Using an RNN
- Aim: Next Word Prediction Using an RNN on Simple English Sentences


In [3]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import Sequential, layers, preprocessing

sentences = [
    "I love to eat apples",
    "She loves to eat oranges",
    "He likes to eat bananas"
]

# Tokenization
tokenizer = preprocessing.text.Tokenizer()
tokenizer.fit_on_texts(sentences)
sequences = tokenizer.texts_to_sequences(sentences)

# Prepare sequences
seq_length = 3
input_sequences = [seq[i-seq_length:i] for seq in sequences for i in range(seq_length, len(seq))]
output_words = [seq[i] for seq in sequences for i in range(seq_length, len(seq))]

# Prepare data
X = preprocessing.sequence.pad_sequences(input_sequences, maxlen=seq_length)
y = tf.keras.utils.to_categorical(output_words, num_classes=len(tokenizer.word_index)+1)

# Build and train model
model = Sequential([
    layers.Embedding(len(tokenizer.word_index)+1, 8, input_length=seq_length),
    layers.SimpleRNN(16, activation='relu'),
    layers.Dense(len(tokenizer.word_index)+1, activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.fit(X, y, epochs=50, verbose=0)

model.summary()
# Predict
seed_seq = tokenizer.texts_to_sequences(["loves to eat"])[0]
predicted_word = tokenizer.index_word[np.argmax(model.predict(
    preprocessing.sequence.pad_sequences([seed_seq], maxlen=seq_length), verbose=0))]

print(f"Next word after 'loves to eat': {predicted_word}")



Next word after 'loves to eat': apples
