<a href="https://colab.research.google.com/github/Aswin-Cheerngodan/RNN/blob/main/Bi_GRU.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Parameters
vocab_size = 10000  # Number of unique words to use
max_length = 200    # Maximum length of input sequences
embedding_dim = 128 # Dimension of word embeddings
batch_size = 32

# Load IMDB dataset
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=vocab_size)

# Pad sequences to ensure consistent input length
X_train = pad_sequences(X_train, maxlen=max_length, padding='post', truncating='post')
X_test = pad_sequences(X_test, maxlen=max_length, padding='post', truncating='post')

# Define the Bidirectional GRU model
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=max_length),
    tf.keras.layers.Bidirectional(tf.keras.layers.GRU(64, return_sequences=False, activation='tanh')),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Model summary
model.summary()

# Train the model
history = model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=5,
    batch_size=batch_size
)

# Evaluate the model
test_loss, test_accuracy = model.evaluate(X_test, y_test, batch_size=batch_size)
print(f"Test Loss: {test_loss}, Test Accuracy: {test_accuracy}")




Epoch 1/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 24ms/step - accuracy: 0.6397 - loss: 0.6130 - val_accuracy: 0.8292 - val_loss: 0.3939
Epoch 2/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 29ms/step - accuracy: 0.8802 - loss: 0.3109 - val_accuracy: 0.8595 - val_loss: 0.3345
Epoch 3/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 30ms/step - accuracy: 0.9291 - loss: 0.2006 - val_accuracy: 0.8516 - val_loss: 0.3529
Epoch 4/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 24ms/step - accuracy: 0.9656 - loss: 0.1091 - val_accuracy: 0.8490 - val_loss: 0.4560
Epoch 5/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 23ms/step - accuracy: 0.9839 - loss: 0.0532 - val_accuracy: 0.8351 - val_loss: 0.6532
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 8ms/step - accuracy: 0.8337 - loss: 0.6548
Test Loss: 0.6532118320465088, Test Accuracy: 0.8351200222969055


In [10]:
import numpy as np

# Example: Custom review for prediction
def preprocess_review(review, word_index, max_length):
    """
    Preprocess the input review for prediction.
    Converts words to integers based on word_index and pads the sequence.
    """
    tokens = review.lower().split()  # Simple tokenization
    tokenized_review = [word_index.get(word, 2) for word in tokens]  # Use 2 (OOV index) for unknown words
    padded_review = pad_sequences([tokenized_review], maxlen=max_length, padding='post', truncating='post')
    return padded_review

# Load the word index (mapping of words to integers) used by the IMDB dataset
word_index = imdb.get_word_index()

# Example review (positive and negative sentiment)
review_1 = "The movie was fantastic! I really enjoyed the storyline and characters."
review_2 = "It was a horrible movie. I would not recommend it to anyone."

# Preprocess reviews
review_1_processed = preprocess_review(review_1, word_index, max_length)
review_2_processed = preprocess_review(review_2, word_index, max_length)

# Make predictions
prediction_1 = model.predict(review_1_processed)[0][0]
prediction_2 = model.predict(review_2_processed)[0][0]

# Output predictions
print(f"Review 1 Sentiment Score: {prediction_1} ({'Positive' if prediction_1 > 0.5 else 'Negative'})")
print(f"Review 2 Sentiment Score: {prediction_2} ({'Positive' if prediction_2 > 0.5 else 'Negative'})")


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb_word_index.json
[1m1641221/1641221[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1us/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 221ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
Review 1 Sentiment Score: 0.848225474357605 (Positive)
Review 2 Sentiment Score: 0.2514648139476776 (Negative)
