In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, GlobalAveragePooling1D
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences


In [2]:
# Number of words to consider as features
vocab_size = 10000

# Load the IMDB dataset
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=vocab_size)

# Maximum review length in words
max_length = 500

# Pad sequences to the same length
x_train = pad_sequences(x_train, maxlen=max_length)
x_test = pad_sequences(x_test, maxlen=max_length)


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz


In [3]:
model = Sequential([
    # Transform each integer into a dense vector of fixed size
    Embedding(input_dim=vocab_size, output_dim=32, input_length=max_length),
    # Average pooling to down-sample the input representation
    GlobalAveragePooling1D(),
    # Standard dense layer with ReLU activation
    Dense(16, activation='relu'),
    # Output layer for binary classification
    Dense(1, activation='sigmoid')
])


In [4]:
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])


In [5]:
history = model.fit(x_train, y_train,
                    epochs=30,
                    batch_size=512,
                    validation_data=(x_test, y_test),
                    verbose=2)


Epoch 1/30
49/49 - 4s - loss: 0.6909 - accuracy: 0.5703 - val_loss: 0.6872 - val_accuracy: 0.6182 - 4s/epoch - 84ms/step
Epoch 2/30
49/49 - 3s - loss: 0.6769 - accuracy: 0.6892 - val_loss: 0.6648 - val_accuracy: 0.7021 - 3s/epoch - 57ms/step
Epoch 3/30
49/49 - 3s - loss: 0.6383 - accuracy: 0.7617 - val_loss: 0.6144 - val_accuracy: 0.7786 - 3s/epoch - 64ms/step
Epoch 4/30
49/49 - 3s - loss: 0.5711 - accuracy: 0.8020 - val_loss: 0.5453 - val_accuracy: 0.8052 - 3s/epoch - 68ms/step
Epoch 5/30
49/49 - 3s - loss: 0.4944 - accuracy: 0.8327 - val_loss: 0.4783 - val_accuracy: 0.8262 - 3s/epoch - 69ms/step
Epoch 6/30
49/49 - 3s - loss: 0.4257 - accuracy: 0.8602 - val_loss: 0.4240 - val_accuracy: 0.8485 - 3s/epoch - 69ms/step
Epoch 7/30
49/49 - 4s - loss: 0.3722 - accuracy: 0.8769 - val_loss: 0.3847 - val_accuracy: 0.8599 - 4s/epoch - 73ms/step
Epoch 8/30
49/49 - 4s - loss: 0.3328 - accuracy: 0.8877 - val_loss: 0.3571 - val_accuracy: 0.8665 - 4s/epoch - 75ms/step
Epoch 9/30
49/49 - 3s - loss: 0.

In [6]:
results = model.evaluate(x_test, y_test, verbose=2)
print(f'Test Loss: {results[0]}, Test Accuracy: {results[1]}')


782/782 - 3s - loss: 0.3133 - accuracy: 0.8821 - 3s/epoch - 4ms/step
Test Loss: 0.31330063939094543, Test Accuracy: 0.8820800185203552
