In [1]:
import tensorflow as tf
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense
from tensorflow.keras.datasets import imdb

# Data Preprocessing:

**1. Load the IMDB dataset, keeping only the top 10,000 most frequently occurring words.**

In [2]:
common_words = 10000                    ## only consider the top 10,000 most common words as mentioned in problem statement 
(X_train, y_train) , (X_test, y_test) = imdb.load_data(num_words = common_words)     ## data Loading
max_sequence_length = 400                                                            ## Setting max lenth of reviews 

**2. Pad the sequences so that they all have the same length.**

In [3]:
# Neural networks expect inputs to have a consistent shape for each batch. 
#If sequences have varying lengths (as text data often does), the model cannot process them together.

X_train = pad_sequences(X_train, maxlen=max_sequence_length)
X_test = pad_sequences(X_test, maxlen=max_sequence_length)

In [4]:
X_train.shape

(25000, 400)

In [5]:
X_test.shape

(25000, 400)

# Model Building:

**3. Create sequential RNN model using Tensorflow and Keras. The model should consist of an Embedding layer, a SimpleRNN layer, and a Dense Output Layer**

In [6]:
model = Sequential([Embedding(input_dim=common_words, output_dim=32, input_length=max_sequence_length),
                   SimpleRNN(32),
                   Dense(1, activation = 'sigmoid')])             # Output layer for binary classification



**4. Compile the model, specifying the appropriate optimizer, loss function, and metrics**

In [7]:
model.compile(optimizer = 'adam' , loss = 'binary_crossentropy' , metrics = ['accuracy'])

In [8]:
model.summary()

# Training:

**5. Train the model on the preprocessed movie reviews, using a batch size of 128 and validating on 20% of the training data.**

In [9]:
history = model.fit(X_train, y_train, epochs=10, batch_size=128, validation_split=0.2)  # Use 20% of training data for validation

Epoch 1/10
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 94ms/step - accuracy: 0.6003 - loss: 0.6508 - val_accuracy: 0.7916 - val_loss: 0.4657
Epoch 2/10
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 91ms/step - accuracy: 0.8481 - loss: 0.3660 - val_accuracy: 0.7784 - val_loss: 0.4954
Epoch 3/10
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 102ms/step - accuracy: 0.8622 - loss: 0.3302 - val_accuracy: 0.8444 - val_loss: 0.3876
Epoch 4/10
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 101ms/step - accuracy: 0.9445 - loss: 0.1623 - val_accuracy: 0.8526 - val_loss: 0.3977
Epoch 5/10
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 99ms/step - accuracy: 0.9711 - loss: 0.0933 - val_accuracy: 0.8006 - val_loss: 0.5593
Epoch 6/10
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 103ms/step - accuracy: 0.9865 - loss: 0.0515 - val_accuracy: 0.8300 - val_loss: 0.5308
Epoch 7/10
[

# Evaluation:

**6. Evaluate the model on the test set and report the accuracy.**

In [10]:
test_loss, test_accuracy = model.evaluate(X_test, y_test)
test_accuracy

[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 16ms/step - accuracy: 0.8036 - loss: 0.7001


0.8046799898147583

In [12]:
def sentiment_prediction(review, word_index = imdb.get_word_index):
# Converts the review text to lowercase to ensure consistency & Splits the review into a list of words based on spaces.
    words = review.lower().split() 
# It maps each word in the review to its corresponding integer index using the word_index dictionary:
    indices = [word_index.get(word,2) for word in words]   
# Ensures that all sequences have the same length (as required by the RNN model).
    indices = pad_sequences([indices], max = max_sequence_length)
    prediction = model.predict(indices)[0][0]
    sentiment = 'Positive' if prediction > 0.5 else 'Negative'
    return sentiment, prediction

review = 'The movie was not good and i do not like movie'
sentiment, confidence = predict_sentiment(review)
print(f'Sentiment: {sentiment}, Confidence: {confidence}')

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
Sentiment: Negative, Confidence: 0.050695374608039856
