<a href="https://colab.research.google.com/github/Gowrish21S/DL_assignment/blob/main/RNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout # Import Embedding and LSTM

text = "The beautiful girl whom I met last time is very intelligent also"
# text = "The handsome boy whom I met last time is very intelligent also"

chars = sorted(list(set(text)))
vocab_size = len(chars)
char_to_index = {char: i for i, char in enumerate(chars)}
index_to_char = {i: char for i, char in enumerate(chars)}

seq_length = 5
sequences = []
labels = []

# --- Data Preparation (X and Y remain integer encoded) ---
for i in range(len(text) - seq_length):
    seq = text[i:i + seq_length]
    label = text[i + seq_length]
    sequences.append([char_to_index[char] for char in seq])
    labels.append(char_to_index[label])

# X remains integer-encoded sequences
X = np.array(sequences)
# Y is integer-encoded labels
y = np.array(labels)

# Y is converted to one-hot for the final softmax layer loss
y_one_hot = tf.one_hot(y, vocab_size)

# --- Model Parameters ---
embedding_dim = 10 # New parameter for the embedding layer
lstm_units = 64    # Increased capacity
text_len = 50      # Length of text to generate


# -----------------------------------------------------------
# --- MODIFIED MODEL ARCHITECTURE (LSTM with Embedding) ---
# -----------------------------------------------------------
model = Sequential()
# 1. Embedding Layer (NEW)
# Maps the integer input (X) into a dense, continuous vector space.
model.add(Embedding(
    input_dim=vocab_size,
    output_dim=embedding_dim,
    input_length=seq_length
))

# 2. Stacked LSTM Layer (Improved RNN cell)
# return_sequences=True is needed to stack the next recurrent layer.
model.add(LSTM(
    units=lstm_units,
    return_sequences=True,
    activation='tanh'
))
model.add(Dropout(0.2)) # Added Dropout for regularization

# 3. Final LSTM Layer
# return_sequences=False (default) as it feeds into the Dense layer.
model.add(LSTM(units=lstm_units, activation='tanh'))
model.add(Dropout(0.2)) # Added Dropout

# 4. Dense Output Layer
model.add(Dense(vocab_size, activation='softmax'))

# Compile (Loss is still categorical_crossentropy as y is one-hot)
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Increased epochs for better learning with a deeper model
print("Training Model...")
model.fit(X, y_one_hot, epochs=200, verbose=2)
# -----------------------------------------------------------


start_seq = "The beautiful girl whom I met "
# start_seq = "The handsome boy whom I met "

generated_text = start_seq

# --- Prediction Loop (Modified for integer input) ---
print("\nGenerating Text...")
for i in range(text_len):
    # Prepare the input sequence: Use integer encoding directly (NO one-hot here)
    x = np.array([[char_to_index[char] for char in generated_text[-seq_length:]]])

    # Model predicts based on integer input
    prediction = model.predict(x, verbose=0)[0]

    # Select the character with the highest probability (greedy approach)
    next_index = np.argmax(prediction)
    next_char = index_to_char[next_index]

    generated_text += next_char

print("\nGenerated Text:")
print(generated_text)

Training Model...
Epoch 1/200




2/2 - 4s - 2s/step - accuracy: 0.0169 - loss: 3.0443
Epoch 2/200
2/2 - 0s - 29ms/step - accuracy: 0.1186 - loss: 3.0397
Epoch 3/200
2/2 - 0s - 28ms/step - accuracy: 0.1695 - loss: 3.0357
Epoch 4/200
2/2 - 0s - 27ms/step - accuracy: 0.1695 - loss: 3.0300
Epoch 5/200
2/2 - 0s - 27ms/step - accuracy: 0.1695 - loss: 3.0253
Epoch 6/200
2/2 - 0s - 27ms/step - accuracy: 0.1864 - loss: 3.0181
Epoch 7/200
2/2 - 0s - 32ms/step - accuracy: 0.1695 - loss: 3.0105
Epoch 8/200
2/2 - 0s - 31ms/step - accuracy: 0.1695 - loss: 2.9999
Epoch 9/200
2/2 - 0s - 33ms/step - accuracy: 0.1695 - loss: 2.9850
Epoch 10/200
2/2 - 0s - 40ms/step - accuracy: 0.1864 - loss: 2.9694
Epoch 11/200
2/2 - 0s - 29ms/step - accuracy: 0.1695 - loss: 2.9522
Epoch 12/200
2/2 - 0s - 30ms/step - accuracy: 0.1695 - loss: 2.9214
Epoch 13/200
2/2 - 0s - 33ms/step - accuracy: 0.1695 - loss: 2.8966
Epoch 14/200
2/2 - 0s - 31ms/step - accuracy: 0.1695 - loss: 2.8504
Epoch 15/200
2/2 - 0s - 29ms/step - accuracy: 0.1695 - loss: 2.8279
Epo