In [20]:
# Import necessary libraries
from tensorflow.keras.datasets import imdb
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [21]:
# Load the IMDB dataset
# Only keep the top 10,000 most frequently occurring words
(train_data, train_label), (test_data, test_label) = imdb.load_data(num_words=10000)

In [22]:
# Function to vectorize sequences (convert list of word indices into binary 10,000-dim vectors)
def vectorize_sequences(sequences, dimensions=10000):
    results = np.zeros((len(sequences), dimensions))
    for i, sequence in enumerate(sequences):
        results[i, sequence] = 1.0  # Set specific word positions to 1
    return results


In [23]:
# Prepare the input data
x_train = vectorize_sequences(train_data)  # Vectorize the training data
x_test = vectorize_sequences(test_data)    # Vectorize the test data

In [24]:
# Prepare the labels (ensure they are float32 for TensorFlow compatibility)
y_train = np.asarray(train_label).astype('float32')
y_test = np.asarray(test_label).astype('float32')

In [25]:
# Build the model
model = Sequential()
model.add(Dense(16, input_shape=(10000,), activation="relu"))  # First hidden layer
model.add(Dense(16, activation="relu"))                        # Second hidden layer
model.add(Dense(1, activation="sigmoid"))                      # Output layer (for binary classification)


In [26]:
# Compile the model
model.compile(optimizer='adam',
              loss='binary_crossentropy',  # Correct loss for binary classification
              metrics=['accuracy'])        # Track accuracy during training and evaluation

In [28]:
# Display model architecture
model.summary()

In [29]:
# Train the model
history = model.fit(
    x_train, y_train,
    validation_split=0.3,   # 30% of training data used for validation
    epochs=20,              # Train for 20 epochs
    batch_size=512,         # Use batches of 512 samples
    verbose=1               # Print progress bar
)

Epoch 1/20
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 37ms/step - accuracy: 0.6963 - loss: 0.6159 - val_accuracy: 0.8671 - val_loss: 0.3840
Epoch 2/20
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 17ms/step - accuracy: 0.8973 - loss: 0.3186 - val_accuracy: 0.8867 - val_loss: 0.2919
Epoch 3/20
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step - accuracy: 0.9299 - loss: 0.2163 - val_accuracy: 0.8901 - val_loss: 0.2756
Epoch 4/20
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - accuracy: 0.9463 - loss: 0.1692 - val_accuracy: 0.8903 - val_loss: 0.2736
Epoch 5/20
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - accuracy: 0.9560 - loss: 0.1392 - val_accuracy: 0.8867 - val_loss: 0.2851
Epoch 6/20
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - accuracy: 0.9698 - loss: 0.1065 - val_accuracy: 0.8833 - val_loss: 0.3076
Epoch 7/20
[1m35/35[0m [32m━━━━

In [33]:
# Evaluate the model on test data
results = model.evaluate(x_test, y_test)
print(f"Test Loss: {results[0]:.4f}")
print(f"Test Accuracy: {results[1]*100:.2f}%")

[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.8530 - loss: 0.7674
Test Loss: 0.7690
Test Accuracy: 85.20%


In [34]:
# Print example prediction
print(f"First review predicted probability (positive): {predictions[0][0]:.4f}")
print("Predicted Sentiment:", "Positive" if predictions[0][0] > 0.5 else "Negative")

First review predicted probability (positive): 0.1429
Predicted Sentiment: Negative
