In [1]:
# IMDB Text Classification using TensorFlow

# Step 1: Import Libraries
import numpy as np
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout

# Step 2: Load the IMDb Dataset
num_words = 10000  # Only use top 10,000 most frequent words
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=num_words)

print(f"Training samples: {len(x_train)}, Testing samples: {len(x_test)}")

# Step 3: Preprocess the Data (Padding sequences)
maxlen = 500  # Max review length
x_train = pad_sequences(x_train, maxlen=maxlen)
x_test = pad_sequences(x_test, maxlen=maxlen)

print("Sample (padded) review:", x_train[0])

# Step 4: Build the Model
model = Sequential([
    Embedding(input_dim=num_words, output_dim=32, input_length=maxlen),
    LSTM(64),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

# Step 5: Compile the Model
model.compile(
    loss='binary_crossentropy',
    optimizer='adam',
    metrics=['accuracy']
)

# Step 6: Train the Model
history = model.fit(
    x_train, y_train,
    epochs=5,
    batch_size=64,
    validation_split=0.2
)

# Step 7: Evaluate the Model
loss, accuracy = model.evaluate(x_test, y_test)
print(f"Test Accuracy: {accuracy:.4f}")

# Step 8: Make Predictions
predictions = model.predict(x_test[:5])
print("Predicted probabilities:", predictions.flatten())
print("Actual labels:", y_test[:5])


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Training samples: 25000, Testing samples: 25000
Sample (padded) review: [   0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0  



Epoch 1/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m173s[0m 540ms/step - accuracy: 0.6457 - loss: 0.5922 - val_accuracy: 0.8412 - val_loss: 0.3700
Epoch 2/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m159s[0m 402ms/step - accuracy: 0.8882 - loss: 0.2927 - val_accuracy: 0.8712 - val_loss: 0.3144
Epoch 3/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m145s[0m 412ms/step - accuracy: 0.9244 - loss: 0.2074 - val_accuracy: 0.8576 - val_loss: 0.3571
Epoch 4/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m141s[0m 410ms/step - accuracy: 0.9283 - loss: 0.1982 - val_accuracy: 0.8668 - val_loss: 0.3365
Epoch 5/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m143s[0m 412ms/step - accuracy: 0.9507 - loss: 0.1443 - val_accuracy: 0.8756 - val_loss: 0.3416
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m62s[0m 80ms/step - accuracy: 0.8632 - loss: 0.3769
Test Accuracy: 0.8646
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m