In [1]:
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np

# Step 1: Load Shakespearean Text Data
shakespeare_text = """
To be, or not to be, that is the question:
Whether 'tis nobler in the mind to suffer
The slings and arrows of outrageous fortune,
Or to take arms against a sea of troubles
And by opposing end them.
"""

# Tokenization
tokenizer = Tokenizer()
tokenizer.fit_on_texts([shakespeare_text])
word_index = tokenizer.word_index
vocab_size = len(word_index) + 1  # +1 for padding

# Convert text to sequences
input_sequences = []
words = shakespeare_text.split()

for i in range(1, len(words)):
    sequence = words[:i + 1]  # Create sequences of increasing length
    token_sequence = tokenizer.texts_to_sequences([" ".join(sequence)])[0]
    input_sequences.append(token_sequence)

# Pad sequences for equal length
max_length = max(len(seq) for seq in input_sequences)
input_sequences = pad_sequences(input_sequences, maxlen=max_length, padding='pre')

# Split into input (X) and target (y)
X = input_sequences[:, :-1]
y = tf.keras.utils.to_categorical(input_sequences[:, -1], num_classes=vocab_size)

# Step 2: Define Vanilla RNN Model
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(input_dim=vocab_size, output_dim=50, input_length=max_length - 1),
    tf.keras.layers.SimpleRNN(100, return_sequences=False),
    tf.keras.layers.Dense(vocab_size, activation='softmax')
])

# Compile Model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Step 3: Train the Model
model.fit(X, y, epochs=100, verbose=1)

# Step 4: Generate Next Words
def predict_next_words(seed_text, num_words=10):
    for _ in range(num_words):
        token_seq = tokenizer.texts_to_sequences([seed_text])[0]
        token_seq = pad_sequences([token_seq], maxlen=max_length - 1, padding='pre')
        predicted_index = np.argmax(model.predict(token_seq), axis=-1)
        next_word = tokenizer.index_word.get(predicted_index[0], '')
        seed_text += " " + next_word
    return seed_text

# Example Prediction
print(predict_next_words("To be"))



Epoch 1/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 19ms/step - accuracy: 0.0559 - loss: 3.4485
Epoch 2/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step - accuracy: 0.1853 - loss: 3.2906 
Epoch 3/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.2484 - loss: 3.1949 
Epoch 4/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.3810 - loss: 3.0801
Epoch 5/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.4057 - loss: 2.9835
Epoch 6/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step - accuracy: 0.4720 - loss: 2.8644 
Epoch 7/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step - accuracy: 0.4825 - loss: 2.7953 
Epoch 8/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - accuracy: 0.4896 - loss: 2.6961
Epoch 9/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0