In [3]:
#import libraries
import numpy as np
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense


In [4]:
# Load IMDb dataset
vocab_size = 10000  # Only keep top 10,000 words
maxlen = 200        # Max review length

In [5]:
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=vocab_size)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m60s[0m 3us/step


In [7]:
# Pad sequences to ensure equal length
x_train = pad_sequences(x_train, maxlen=maxlen)
x_test = pad_sequences(x_test, maxlen=maxlen)

In [8]:
print("Shape of x_train:", x_train.shape)
print("Shape of y_train:", y_train.shape)
print("Shape of x_test:", x_test.shape)
print("Shape of y_test:", y_test.shape)

Shape of x_train: (25000, 200)
Shape of y_train: (25000,)
Shape of x_test: (25000, 200)
Shape of y_test: (25000,)


In [14]:
# Build RNN model
model = Sequential([
    Embedding(input_dim=vocab_size, output_dim=32, input_shape=(maxlen,)),
    SimpleRNN(64, return_sequences=False),  # Output only the last output
    Dense(1, activation='sigmoid')          # Binary classification
])

  super().__init__(**kwargs)


In [15]:
model.summary()

In [16]:
# Compile the model
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

In [17]:
# Train the model
model.fit(x_train, y_train, epochs=5, batch_size=64, validation_split=0.2)


Epoch 1/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 80ms/step - accuracy: 0.5634 - loss: 0.6695 - val_accuracy: 0.7244 - val_loss: 0.5450
Epoch 2/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 76ms/step - accuracy: 0.7869 - loss: 0.4685 - val_accuracy: 0.8206 - val_loss: 0.4201
Epoch 3/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 77ms/step - accuracy: 0.8989 - loss: 0.2577 - val_accuracy: 0.8136 - val_loss: 0.4270
Epoch 4/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 77ms/step - accuracy: 0.9411 - loss: 0.1662 - val_accuracy: 0.7962 - val_loss: 0.5240
Epoch 5/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 82ms/step - accuracy: 0.9760 - loss: 0.0781 - val_accuracy: 0.7778 - val_loss: 0.6602


<keras.src.callbacks.history.History at 0x1f431762a20>

In [20]:
loss, accuracy = model.evaluate(x_test, y_test)
print(f"Test Loss: {loss:.4f}")
print(f"Test Accuracy: {accuracy:.4f}")

[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 24ms/step - accuracy: 0.7670 - loss: 0.6735
Test Loss: 0.6743
Test Accuracy: 0.7677


In [19]:
# Evaluate
loss, accuracy = model.evaluate(x_test, y_test)
print(f"\nTest Accuracy: {accuracy:.4f}")

[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 24ms/step - accuracy: 0.7670 - loss: 0.6735

Test Accuracy: 0.7677


In [23]:
# Predict on test data
y_pred_prob = model.predict(x_test)

# Convert probabilities to binary class (0 or 1)
y_pred = (y_pred_prob > 0.5).astype("int32")

# Check accuracy manually (optional, since model.evaluate already does it)
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

print("Manual Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))


[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 47ms/step
Manual Accuracy: 0.85852

Classification Report:
               precision    recall  f1-score   support

           0       0.85      0.88      0.86     12500
           1       0.87      0.84      0.86     12500

    accuracy                           0.86     25000
   macro avg       0.86      0.86      0.86     25000
weighted avg       0.86      0.86      0.86     25000


Confusion Matrix:
 [[10965  1535]
 [ 2002 10498]]


In [24]:
# Show prediction vs actual for first 10 samples
for i in range(10):
    print(f"Review {i+1}:")
    print(f"Predicted Sentiment: {'Positive' if y_pred[i][0] == 1 else 'Negative'}")
    print(f"True Sentiment:      {'Positive' if y_test[i] == 1 else 'Negative'}\n")


Review 1:
Predicted Sentiment: Negative
True Sentiment:      Negative

Review 2:
Predicted Sentiment: Positive
True Sentiment:      Positive

Review 3:
Predicted Sentiment: Negative
True Sentiment:      Positive

Review 4:
Predicted Sentiment: Negative
True Sentiment:      Negative

Review 5:
Predicted Sentiment: Positive
True Sentiment:      Positive

Review 6:
Predicted Sentiment: Positive
True Sentiment:      Positive

Review 7:
Predicted Sentiment: Positive
True Sentiment:      Positive

Review 8:
Predicted Sentiment: Negative
True Sentiment:      Negative

Review 9:
Predicted Sentiment: Positive
True Sentiment:      Negative

Review 10:
Predicted Sentiment: Positive
True Sentiment:      Positive



In [21]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense

# Parameters
vocab_size = 10000  # top 10,000 words
maxlen = 200        # max length of each review

# Load and preprocess the data
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=vocab_size)
x_train = pad_sequences(x_train, maxlen=maxlen)
x_test = pad_sequences(x_test, maxlen=maxlen)

# Build LSTM model
model = Sequential([
    Embedding(input_dim=vocab_size, output_dim=32, input_shape=(maxlen,)),
    LSTM(64),
    Dense(1, activation='sigmoid')
])

# Compile model
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

# Train the model
model.fit(x_train, y_train, epochs=5, batch_size=64, validation_split=0.2)

# Evaluate

loss, accuracy = model.evaluate(x_test, y_test)
print(f"Test Loss: {loss:.4f}")
print(f"Test Accuracy: {accuracy:.4f}")

Epoch 1/5


  super().__init__(**kwargs)


[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m76s[0m 228ms/step - accuracy: 0.6889 - loss: 0.5583 - val_accuracy: 0.8718 - val_loss: 0.3161
Epoch 2/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m63s[0m 200ms/step - accuracy: 0.9074 - loss: 0.2471 - val_accuracy: 0.8602 - val_loss: 0.3199
Epoch 3/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m61s[0m 196ms/step - accuracy: 0.9315 - loss: 0.1876 - val_accuracy: 0.8680 - val_loss: 0.3356
Epoch 4/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m60s[0m 190ms/step - accuracy: 0.9504 - loss: 0.1457 - val_accuracy: 0.8744 - val_loss: 0.3371
Epoch 5/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 208ms/step - accuracy: 0.9556 - loss: 0.1249 - val_accuracy: 0.8710 - val_loss: 0.3947
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 59ms/step - accuracy: 0.8593 - loss: 0.4333
Test Loss: 0.4334
Test Accuracy: 0.8585
