### Create a program to implement the working of RNN - prediction of words from the paragraph provided.


In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, Dense, Embedding
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Sample paragraph
paragraph = """
Machine learning is a method of data analysis that automates analytical model building. 
Using algorithms that iteratively learn from data, machine learning allows computers to find hidden insights 
without being explicitly programmed where to look.
"""

# Step 1: Tokenization (word-level)
tokenizer = Tokenizer()
tokenizer.fit_on_texts([paragraph])
word_index = tokenizer.word_index
index_word = {v: k for k, v in word_index.items()}

# Convert paragraph to sequence of word indices
tokens = tokenizer.texts_to_sequences([paragraph])[0]

# Step 2: Prepare sequences for training
X_sequences = []
y = []

seq_length = 3  # You can change this to 4 or more for longer context

for i in range(seq_length, len(tokens)):
    X_sequences.append(tokens[i-seq_length:i])
    y.append(tokens[i])

X = np.array(X_sequences)
y = tf.keras.utils.to_categorical(y, num_classes=len(word_index) + 1)

# Step 3: Define the model
model = Sequential()
model.add(Embedding(input_dim=len(word_index) + 1, output_dim=50, input_length=seq_length))
model.add(SimpleRNN(128))
model.add(Dense(len(word_index) + 1, activation='softmax'))

# Step 4: Compile and train
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X, y, epochs=300, verbose=0)

# Step 5: Predict next word
def predict_next_word(seed_text, num_words=1):
    for _ in range(num_words):
        token_list = tokenizer.texts_to_sequences([seed_text])[0]
        token_list = pad_sequences([token_list], maxlen=seq_length)
        predicted_probs = model.predict(token_list, verbose=0)
        predicted_index = np.argmax(predicted_probs)
        predicted_word = index_word.get(predicted_index, '')
        seed_text += ' ' + predicted_word
    return seed_text

# Test
seed = "machine learning is"
output = predict_next_word(seed, num_words=5)
print("Generated text:", output)


Generated text: machine learning is a method of data analysis
