In [None]:
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Step 1: Load the IMDB dataset
num_words = 10000  # Limit the vocabulary to the top 10,000 most frequent words
max_len = 500  # Set the maximum length of each review to 500 words

(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=num_words)

# Step 2: Preprocess the data
X_train = pad_sequences(X_train, maxlen=max_len)
X_test = pad_sequences(X_test, maxlen=max_len)

# Step 3: Build the model
model = models.Sequential([
    layers.Embedding(input_dim=num_words, output_dim=128, input_length=max_len),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dense(1, activation='sigmoid')  # Sigmoid for binary classification
])

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Print the model summary
model.summary()

# Step 4: Train the model
history = model.fit(X_train, y_train, epochs=5, batch_size=64, validation_split=0.2)

# Step 5: Evaluate the model
test_loss, test_acc = model.evaluate(X_test, y_test)
print(f"Test accuracy: {test_acc:.4f}")

# Step 6: Make predictions
y_pred = model.predict(X_test)
y_pred_labels = (y_pred > 0.5).astype("int32")

# Display first 10 predictions
for i in range(10):
    print(f"Predicted: {'Positive' if y_pred_labels[i] == 1 else 'Negative'}, True: {'Positive' if y_test[i] == 1 else 'Negative'}")


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step




Epoch 1/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m58s[0m 173ms/step - accuracy: 0.5139 - loss: 0.7118 - val_accuracy: 0.5974 - val_loss: 0.6418
Epoch 2/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m84s[0m 181ms/step - accuracy: 0.7772 - loss: 0.4577 - val_accuracy: 0.8684 - val_loss: 0.3225
Epoch 3/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 180ms/step - accuracy: 0.9220 - loss: 0.2017 - val_accuracy: 0.8750 - val_loss: 0.3158
Epoch 4/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m78s[0m 169ms/step - accuracy: 0.9617 - loss: 0.1147 - val_accuracy: 0.8756 - val_loss: 0.3396
Epoch 5/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m84s[0m 174ms/step - accuracy: 0.9846 - loss: 0.0559 - val_accuracy: 0.8732 - val_loss: 0.3957
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 18ms/step - accuracy: 0.8706 - loss: 0.3807
Test accuracy: 0.8708
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[