In [None]:
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense, Dropout
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np
import matplotlib.pyplot as plt

In [None]:
# Parameters for the dataset
vocab_size = 10000     # Consider only the top 10,000 most frequent words
max_review_length = 256 # Pad/truncate reviews to this length

# Load the IMDB dataset
(train_data, train_labels), (test_data, test_labels) = imdb.load_data(num_words=vocab_size)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [None]:
# Pad sequences to ensure all inputs have a uniform length (256)
train_data = pad_sequences(train_data, maxlen=max_review_length, padding='post', truncating='post')
test_data = pad_sequences(test_data, maxlen=max_review_length, padding='post', truncating='post')

In [None]:
# Hyperparameter: The dimensionality of the word embedding vector
embedding_dim = 128
model = Sequential([
    # LAYER 1: The EMBEDDING Layer
    # Input: (batch_size, 256 integers) -> Output: (batch_size, 256, 128 dense vectors)
    Embedding(input_dim=vocab_size,
              output_dim=embedding_dim,
              input_length=max_review_length),

    # LAYER 2: SimpleRNN - The basic RNN component
    # This layer processes the sequence sequentially,
    # It outputs a single vector (32 units) summarizing the entire sequence.
    SimpleRNN(32),

    # LAYER 3: Dense Hidden Layer
    # Processes the context vector provided by the SimpleRNN.
    Dense(16, activation='relu'),
    Dropout(0.2), # Dropout for regularization

    # LAYER 4: Output Layer for Binary Classification
    Dense(1, activation='sigmoid')])
model.summary()



In [None]:
model.compile(optimizer='adam',
              loss='binary_crossentropy', # Appropriate loss for sigmoid output
              metrics=['accuracy'])

In [None]:
# Train the model
epochs = 5
history = model.fit(train_data,
                    train_labels,
                    epochs=epochs,
                    batch_size=512,
                    validation_split=0.2, # Use 20% of training data for validation
                    verbose=1)

Epoch 1/5
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 541ms/step - accuracy: 0.4979 - loss: 0.6959 - val_accuracy: 0.5026 - val_loss: 0.6932
Epoch 2/5
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 529ms/step - accuracy: 0.5883 - loss: 0.6710 - val_accuracy: 0.5036 - val_loss: 0.6960
Epoch 3/5
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 503ms/step - accuracy: 0.6207 - loss: 0.6220 - val_accuracy: 0.5040 - val_loss: 0.7147
Epoch 4/5
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 565ms/step - accuracy: 0.6539 - loss: 0.5494 - val_accuracy: 0.5014 - val_loss: 0.7573
Epoch 5/5
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 536ms/step - accuracy: 0.6466 - loss: 0.5094 - val_accuracy: 0.5040 - val_loss: 0.8193


In [None]:
test_loss, test_acc = model.evaluate(test_data, test_labels, verbose=2)
print(f"\nFinal Test Accuracy: {test_acc:.4f}")
print(f"Final Test Loss: {test_loss:.4f}")

782/782 - 14s - 18ms/step - accuracy: 0.5078 - loss: 0.8062

Final Test Accuracy: 0.5078
Final Test Loss: 0.8062
