In [3]:
# Import TensorFlow library (main deep learning framework)
import tensorflow as tf

# Import Sequential model (stack layers one after another)
from tensorflow.keras.models import Sequential

# Import required layers:
# Embedding → converts words to vectors
# SimpleRNN → basic recurrent neural network
# Bidirectional → reads sequence forward & backward
# Dense → fully connected output layer
from tensorflow.keras.layers import Embedding, SimpleRNN, Bidirectional, Dense

# Import IMDB movie review dataset (sentiment analysis)
from tensorflow.keras.datasets import imdb

# Import function to make all sequences same length
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [4]:
# Load IMDB dataset
# num_words=10000 → keep only top 10,000 most frequent words
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=10000)

# Print number of training samples
print("Training samples:", len(x_train))

# Print number of testing samples
print("Testing samples:", len(x_test))

Training samples: 25000
Testing samples: 25000


In [5]:
# Maximum length of each review (number of words)
max_len = 200

# Pad training reviews to length 200
# Short reviews → padded with zeros
# Long reviews → truncated
x_train = pad_sequences(x_train, maxlen=max_len)

# Pad testing reviews to same length
x_test = pad_sequences(x_test, maxlen=max_len)

In [6]:
# Build the model
model = Sequential([

    # Embedding layer:
    # input_dim=10000 → vocabulary size
    # output_dim=128 → each word becomes a 128-dim vector
    # input_length=200 → length of input sequences
    Embedding(input_dim=10000, output_dim=128, input_length=max_len),

    # Bidirectional SimpleRNN:
    # Reads text left-to-right and right-to-left
    # 64 neurons in RNN
    Bidirectional(SimpleRNN(64)),

    # Output layer:
    # 1 neuron → binary classification (positive/negative)
    # sigmoid → output between 0 and 1
    Dense(1, activation='sigmoid')
])



In [7]:
# Compile the model
model.compile(
    optimizer='adam',                  # Optimizer to update weights
    loss='binary_crossentropy',         # Loss function for binary output
    metrics=['accuracy']                # Measure accuracy
)

# Show model architecture
model.summary()


In [8]:
# Train the model
history = model.fit(
    x_train, y_train,                  # Training data and labels
    epochs=5,                           # Number of training cycles
    batch_size=64,                      # Samples per batch
    validation_split=0.2,               # 20% data for validation
    verbose=1                           # Show training progress
)

Epoch 1/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 65ms/step - accuracy: 0.6542 - loss: 0.6031 - val_accuracy: 0.7302 - val_loss: 0.5425
Epoch 2/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 60ms/step - accuracy: 0.8207 - loss: 0.4022 - val_accuracy: 0.7898 - val_loss: 0.4712
Epoch 3/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 60ms/step - accuracy: 0.8970 - loss: 0.2544 - val_accuracy: 0.6322 - val_loss: 0.7462
Epoch 4/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 60ms/step - accuracy: 0.9436 - loss: 0.1532 - val_accuracy: 0.7980 - val_loss: 0.6150
Epoch 5/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 62ms/step - accuracy: 0.9927 - loss: 0.0276 - val_accuracy: 0.7518 - val_loss: 0.7990


In [9]:
# Evaluate model on test data
loss, accuracy = model.evaluate(x_test, y_test, verbose=1)

# Print test accuracy
print("Test Accuracy:", accuracy)

[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 10ms/step - accuracy: 0.7530 - loss: 0.8052
Test Accuracy: 0.7529600262641907


In [10]:
# Get dictionary mapping words → integers
word_index = imdb.get_word_index()


# Function to convert raw text into model input
def encode_review(text):
    # Convert each word to its integer index
    # word_index.get(word, 2):
    # 2 is used for unknown words
    encoded = [word_index.get(word, 2) for word in text.lower().split()]
    
    # Pad encoded review to required length
    return pad_sequences([encoded], maxlen=max_len)


# Test with a positive review
review = "The movie was absolutely fantastic and enjoyable"

# Predict sentiment
prediction = model.predict(encode_review(review))

# Print result based on probability
print("Positive" if prediction[0][0] > 0.5 else "Negative")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 201ms/step
Positive


In [11]:
# Two sentences with same words but different order
review1 = "movie not good"
review2 = "movie good not"

# Show predictions
print(model.predict(encode_review(review1)))
print(model.predict(encode_review(review2)))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[[0.9966375]]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[[0.9909812]]


In [12]:
# Test negation understanding
review = "the movie was not bad at all"
shuffled = "bad movie the at was not all"

# Predictions for correct and shuffled sentence
print(model.predict(encode_review(review)))
print(model.predict(encode_review(shuffled)))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[[0.9952882]]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
[[0.998533]]


In [13]:
# Short vs long context
short_review = "not bad"
long_review = "the movie overall was not bad at all"

# Predictions
print(model.predict(encode_review(short_review)))
print(model.predict(encode_review(long_review)))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[[0.9955038]]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[[0.99101806]]


In [14]:
# Import layer that removes sequence order information
from tensorflow.keras.layers import GlobalAveragePooling1D


# Model without memory (no RNN)
model_no_memory = Sequential([
    
    # Embedding layer
    Embedding(10000, 120, input_length=max_len),
    
    # Averages word vectors (ignores order)
    GlobalAveragePooling1D(),
    
    # Output layer
    Dense(1, activation='sigmoid')
])
# Compile second model
model_no_memory.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)


In [17]:
# Test order sensitivity again
review1 = "movie not good"
review2 = "movie good not bad"

# Predictions using RNN model (not model_no_memory here)
print(model.predict(encode_review(review1)))
print(model.predict(encode_review(review2)))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
[[0.9966375]]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[[0.8651316]]
