<a href="https://colab.research.google.com/github/SuryaPrakashBolloju/4285/blob/main/dlexp10.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import seaborn as sns
from tensorflow.keras.layers import SimpleRNN, Dense, Embedding
from tensorflow.keras.datasets import imdb
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing import sequence

# Load IMDB dataset with only the 5000 most frequent words
vocab_size = 5000
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=vocab_size)

# Print sample data
print("First review (as token IDs):", x_train[0])

# Reverse word index (shifted by 3 due to reserved indices)
word_index = imdb.get_word_index()
word_index = {i + 3: word for word, i in word_index.items()}
word_index[0], word_index[1], word_index[2] = "<PAD>", "<START>", "<UNK>"

# Print reconstructed first review
print("First review (as words):", " ".join([word_index.get(i, "<UNK>") for i in x_train[0]]))

# Find min & max review lengths
max_len = max(len(x) for x in x_train + x_test)
min_len = min(len(x) for x in x_train + x_test)
print(f"Max review length: {max_len}, Min review length: {min_len}")

# Pad sequences to a fixed length (400 words)
max_words = 400
x_train = sequence.pad_sequences(x_train, maxlen=max_words)
x_test = sequence.pad_sequences(x_test, maxlen=max_words)

# Create validation set (20% of training data)
val_size = int(0.2 * len(x_train))
x_valid, y_valid = x_train[:val_size], y_train[:val_size]
x_train_, y_train_ = x_train[val_size:], y_train[val_size:]

# Embedding size
embd_len = 32

# Build Simple RNN Model
RNN_model = Sequential(name="Simple_RNN")
RNN_model.add(Embedding(vocab_size, embd_len, input_length=max_words))
RNN_model.add(SimpleRNN(128, activation='tanh'))
RNN_model.add(Dense(1, activation='sigmoid'))

# Model summary
print(RNN_model.summary())

# Compile model
RNN_model.compile(loss="binary_crossentropy",
                  optimizer='adam',
                  metrics=['accuracy'])

# Train model
history = RNN_model.fit(x_train_, y_train_,
                         batch_size=64,
                         epochs=5,
                         verbose=1,
                         validation_data=(x_valid, y_valid))

# Evaluate on test data
test_loss, test_acc = RNN_model.evaluate(x_test, y_test, verbose=0)
print(f"Test Accuracy: {test_acc:.4f}")

# Plot accuracy over epochs
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()
