In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np

# 1. Load the IMDB dataset
# (num_words=10000) -> only keep the top 10,000 most frequent words
(train_data, train_labels), (test_data, test_labels) = keras.datasets.imdb.load_data(num_words=10000)

# 2. Preprocessing: Pad sequences so they're all the same length
train_data = keras.preprocessing.sequence.pad_sequences(train_data, value=0, padding='post', maxlen=256)
test_data = keras.preprocessing.sequence.pad_sequences(test_data, value=0, padding='post', maxlen=256)

# 3. Build the model (Deep Neural Network)
model = keras.Sequential([
    layers.Embedding(input_dim=10000, output_dim=16),   # Turn word index into dense vector
    layers.GlobalAveragePooling1D(),                    # Average over all word embeddings
    layers.Dense(16, activation='relu'),                 # Hidden layer
    layers.Dense(1, activation='sigmoid')                # Output layer: 1 neuron, sigmoid (binary classification)
])

# 4. Compile the model
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

# 5. Train the model
history = model.fit(train_data, train_labels, epochs=10, batch_size=512, validation_split=0.2)

# 6. Evaluate the model
results = model.evaluate(test_data, test_labels)
print(f"Test Loss: {results[0]:.4f}")
print(f"Test Accuracy: {results[1]:.4f}")


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 0us/step
Epoch 1/10
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 26ms/step - accuracy: 0.5272 - loss: 0.6906 - val_accuracy: 0.6886 - val_loss: 0.6777
Epoch 2/10
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 18ms/step - accuracy: 0.6594 - loss: 0.6727 - val_accuracy: 0.7344 - val_loss: 0.6497
Epoch 3/10
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step - accuracy: 0.7195 - loss: 0.6409 - val_accuracy: 0.7728 - val_loss: 0.6040
Epoch 4/10
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step - accuracy: 0.7818 - loss: 0.5891 - val_accuracy: 0.7960 - val_loss: 0.5453
Epoch 5/10
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 18ms/step - accuracy: 0.8062 - loss: 0.5257 - val_accuracy: 0.8222 - val_loss: 0.4833
Epoch 6/10
[1m40/4

In [2]:
# 7. Test with a new review (optional)

# Load word index mapping (word -> int)
word_index = keras.datasets.imdb.get_word_index()

# Function to encode a review (text -> integers)
def encode_review(text):
    words = text.lower().split()
    encoded = [word_index.get(word, 2) for word in words]  # 2 = unknown words
    return keras.preprocessing.sequence.pad_sequences([encoded], maxlen=256)

# Example custom review
sample_review = "This movie was absolutely wonderful, great acting and story"
encoded_review = encode_review(sample_review)

# Predict
prediction = model.predict(encoded_review)
print(f"Prediction Score: {prediction[0][0]:.4f}")

if prediction[0][0] > 0.5:
    print("Sentiment: Positive 😀")
else:
    print("Sentiment: Negative 😞")

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb_word_index.json
[1m1641221/1641221[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 92ms/step
Prediction Score: 0.6976
Sentiment: Positive 😀
