In [13]:
# First step, import libraries.
import numpy as np 
import pandas as pd 
from matplotlib import pyplot as plt

In [14]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense
from tensorflow.keras.preprocessing.sequence import pad_sequences # Padding CNN
from tensorflow.keras.datasets import imdb

In [15]:
# Load IMDB dataset
vocab_size = 10000 # Use the top 10,000 most frequent words
maxlen = 100 # Limit each review to 100 words

(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=vocab_size)

In [16]:
# Pad sequences to ensure uniform input length
X_train = pad_sequences(X_train, maxlen=maxlen)
X_test = pad_sequences(X_test, maxlen=maxlen)

In [17]:
# Load the word-to-index mapping
word_to_index = imdb.get_word_index()
# Reverse mapping from index to word (optional, useful for debugging)
index_to_word = {index + 3: word for word, index in word_to_index.items()}
index_to_word[0] = "[PAD]"
index_to_word[1] = "[START]"
index_to_word[2] = "[UNK]"

In [20]:
# Define the RNN model

model = Sequential([
    Embedding(input_dim=vocab_size, output_dim=32, input_length=maxlen),
    SimpleRNN(32, activation='tanh'),
    Dense(1, activation='sigmoid')
])

In [21]:
# compile the model

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


In [23]:
# Train the model

history = model.fit(X_train, y_train, epochs=20, batch_size=64, validation_split=0.2)

Epoch 1/20
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 21ms/step - accuracy: 0.5549 - loss: 0.6820 - val_accuracy: 0.7386 - val_loss: 0.5304
Epoch 2/20
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 20ms/step - accuracy: 0.8297 - loss: 0.3977 - val_accuracy: 0.8230 - val_loss: 0.3985
Epoch 3/20
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 19ms/step - accuracy: 0.9274 - loss: 0.2068 - val_accuracy: 0.7776 - val_loss: 0.4945
Epoch 4/20
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 21ms/step - accuracy: 0.9746 - loss: 0.0869 - val_accuracy: 0.8066 - val_loss: 0.5470
Epoch 5/20
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 20ms/step - accuracy: 0.9915 - loss: 0.0326 - val_accuracy: 0.8054 - val_loss: 0.6361
Epoch 6/20
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 21ms/step - accuracy: 0.9988 - loss: 0.0091 - val_accuracy: 0.8070 - val_loss: 0.7183
Epoch 7/20
[1m313/

In [24]:
# Evaluate the model
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {test_accuracy:.4f}")

[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 6ms/step - accuracy: 0.8101 - loss: 1.0755
Test Accuracy: 0.8123


In [28]:
# Predict on a sample review

sample_review = input("Enter your Review: ")

Enter your Review:  Bad


In [29]:
# Preprocess the sample review
sample_tokens = [word_to_index.get(word, 2) for word in sample_review.lower().split()]
sample_padded = pad_sequences([sample_tokens], maxlen=maxlen)
predicted_sentiment = model.predict(sample_padded)
print(f"Predicted Sentiment: {'Positive' if predicted_sentiment > 0.5 else 'Negative'}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 82ms/step
Predicted Sentiment: Positive
