In [None]:
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.datasets import imdb
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, Flatten
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns

# Load the IMDb dataset
vocab_size = 10000
max_len = 300
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=vocab_size)

# Pad sequences to ensure uniform length
X_train = pad_sequences(X_train, maxlen=max_len)
X_test = pad_sequences(X_test, maxlen=max_len)

# Build the neural network model
model = Sequential([
    Embedding(input_dim=vocab_size, output_dim=32, input_length=max_len),
    Flatten(),
    Dense(16, activation='relu'),
    Dense(1, activation='sigmoid')
])

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, epochs=5, batch_size=128, validation_split=0.2)

# Evaluate the model
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print("Test Loss:", test_loss)
print("Test Accuracy:", test_accuracy)

# Predict on test data
y_pred_probs = model.predict(X_test)
y_pred_labels = (y_pred_probs > 0.5).astype(int).flatten()

# Classification report
print("\nClassification Report:")
print(classification_report(y_test, y_pred_labels, target_names=['Negative', 'Positive']))

# Confusion matrix
cm = confusion_matrix(y_test, y_pred_labels)
plt.figure(figsize=(6, 4))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['Negative', 'Positive'], yticklabels=['Negative', 'Positive'])
plt.title("Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.tight_layout()
plt.show()

# Scatter plot: Actual vs Predicted
sample_size = 100
indices = np.arange(sample_size)
actual = y_test[:sample_size]
predicted = y_pred_labels[:sample_size]

plt.figure(figsize=(10, 5))
plt.scatter(indices, actual, color='blue', label='Actual', marker='o')
plt.scatter(indices, predicted, color='red', label='Predicted', marker='o')
plt.title("Actual vs Predicted Values")
plt.xlabel("Samples")
plt.ylabel("Sentiment")
plt.yticks([0, 1], ['Negative', 'Positive'])
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()
