In [None]:
import tensorflow as tf
from tensorflow import keras
 
# 1. Load IMDB movie reviews (binary: positive or negative)
max_features = 10000  # Only use top 10,000 most common words
max_len      = 200    # Pad/truncate reviews to 200 words
 
(x_train, y_train), (x_test, y_test) = keras.datasets.imdb.load_data(
    num_words=max_features
)
 
# 2. Pad sequences to equal length
x_train = keras.preprocessing.sequence.pad_sequences(x_train, maxlen=max_len)
x_test  = keras.preprocessing.sequence.pad_sequences(x_test,  maxlen=max_len)
 
print(f"Training samples: {len(x_train)}")
print(f"Each review padded to length: {x_train.shape[1]}")
 
# 3. Build the LSTM model
model = keras.Sequential([
    # Embedding: convert word indices to dense vectors
    keras.layers.Embedding(max_features, 64, input_length=max_len),
 
    # LSTM layer to learn sequential patterns
    keras.layers.LSTM(64, return_sequences=False),
 
    keras.layers.Dense(32, activation='relu'),
    keras.layers.Dropout(0.3),
    keras.layers.Dense(1, activation='sigmoid')  # Binary output
])
 
# 4. Compile
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])
 
model.summary()
 
# 5. Train
history = model.fit(x_train, y_train,
                    epochs=5,
                    batch_size=64,
                    validation_split=0.2)
 
# 6. Evaluate
test_loss, test_acc = model.evaluate(x_test, y_test)
print(f"\nTest Accuracy: {test_acc:.4f}")
 
# 7. Predict on new text (requires preprocessing in real use)
# Example output interpretation:
# sigmoid output > 0.5 = Positive review
# sigmoid output < 0.5 = Negative review
