# **Import necessary libraries**

In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.datasets import imdb
import numpy as np

# **Load IMDB dataset**

In [3]:
vocab_size = 10000  # Number of words to keep in vocabulary
max_length = 100  # Maximum review length
embedding_dim = 32
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=vocab_size)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step


# **Pad sequences to ensure uniform input size**

In [4]:
x_train = pad_sequences(x_train, maxlen=max_length, padding='post', truncating='post')
x_test = pad_sequences(x_test, maxlen=max_length, padding='post', truncating='post')

# **Build the RNN model (using LSTM)**

In [5]:
model = keras.Sequential([
    keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length),
    keras.layers.LSTM(64, return_sequences=False),  # Using LSTM instead of SimpleRNN
    keras.layers.Dense(1, activation='sigmoid')  # Output layer for binary classification
])



# **Compile the model**

In [6]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# **Train the model**

In [7]:
history = model.fit(x_train, y_train, epochs=5, batch_size=64, validation_data=(x_test, y_test))

Epoch 1/5
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 99ms/step - accuracy: 0.6456 - loss: 0.5970 - val_accuracy: 0.7933 - val_loss: 0.4517
Epoch 2/5
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 99ms/step - accuracy: 0.8688 - loss: 0.3327 - val_accuracy: 0.7984 - val_loss: 0.4545
Epoch 3/5
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 96ms/step - accuracy: 0.8911 - loss: 0.2886 - val_accuracy: 0.8076 - val_loss: 0.4433
Epoch 4/5
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 96ms/step - accuracy: 0.9134 - loss: 0.2367 - val_accuracy: 0.7948 - val_loss: 0.4661
Epoch 5/5
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 96ms/step - accuracy: 0.9299 - loss: 0.1980 - val_accuracy: 0.8018 - val_loss: 0.5442


# **Evaluate the model**

In [8]:
loss, accuracy = model.evaluate(x_test, y_test)
print(f"Test Accuracy: {accuracy:.4f}")

[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 16ms/step - accuracy: 0.7991 - loss: 0.5466
Test Accuracy: 0.8018


# **Save the model**

In [9]:
model.save('/content/sentiment_rnn_model.h5')
print("Model saved to /content/sentiment_rnn_model.h5")



Model saved to /content/sentiment_rnn_model.h5


# **Load IMDB word index (to convert words to numbers)**

In [10]:
word_index = imdb.get_word_index()
word_index = {k: (v + 3) for k, v in word_index.items()}  # Shift indices
word_index["<PAD>"] = 0
word_index["<START>"] = 1
word_index["<UNK>"] = 2
word_index["<UNUSED>"] = 3
reverse_word_index = {v: k for k, v in word_index.items()}  # Reverse mapping

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb_word_index.json
[1m1641221/1641221[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step


# **Helper Functions**

In [11]:
# Function to preprocess custom text
def encode_text(text):
    words = text.lower().split()
    encoded = [word_index.get(word, 2) for word in words]  # Convert words to numbers
    padded = pad_sequences([encoded], maxlen=max_length, padding='post', truncating='post')
    return padded

In [12]:
# Function to predict sentiment from user input
def predict_sentiment(text):
    processed_text = encode_text(text)
    prediction = model.predict(processed_text)[0][0]  # Get probability
    sentiment = "Positive 😀" if prediction > 0.5 else "Negative 😞"
    print(f"\nReview: {text}")
    print(f"Sentiment: {sentiment} (Confidence: {prediction:.4f})")

# **Example test**

In [13]:
sample_text = "This movie was fantastic! I really enjoyed it."
predict_sentiment(sample_text)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 242ms/step

Review: This movie was fantastic! I really enjoyed it.
Sentiment: Positive 😀 (Confidence: 0.8192)


# **Allow user to input their own review**

In [14]:
user_review = input("Enter a movie review: ")
predict_sentiment(user_review)

Enter a movie review: this movie was bad for me
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step

Review: this movie was bad for me
Sentiment: Negative 😞 (Confidence: 0.1590)
