In [None]:
# IMDB Text Classification using TensorFlow

# Step 1: Import Libraries
import numpy as np
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout

# Step 2: Load the IMDb Dataset
num_words = 10000  # Only use top 10,000 most frequent words
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=num_words)

print(f"Training samples: {len(x_train)}, Testing samples: {len(x_test)}")

# Step 3: Preprocess the Data (Padding sequences)
maxlen = 500  # Max review length
x_train = pad_sequences(x_train, maxlen=maxlen)
x_test = pad_sequences(x_test, maxlen=maxlen)

print("Sample (padded) review:", x_train[0])

# Step 4: Build the Model
model = Sequential([
    Embedding(input_dim=num_words, output_dim=32, input_length=maxlen),
    LSTM(64),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

# Step 5: Compile the Model
model.compile(
    loss='binary_crossentropy',
    optimizer='adam',
    metrics=['accuracy']
)

# Step 6: Train the Model
history = model.fit(
    x_train, y_train,
    epochs=5,
    batch_size=64,
    validation_split=0.2
)

# Step 7: Evaluate the Model
loss, accuracy = model.evaluate(x_test, y_test)
print(f"Test Accuracy: {accuracy:.4f}")

# Step 8: Make Predictions
predictions = model.predict(x_test[:5])
print("Predicted probabilities:", predictions.flatten())
print("Actual labels:", y_test[:5])
