In [1]:
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing import sequence

# Load dataset
max_features = 10000  # number of words to consider as features
max_len = 200         # cut reviews after 200 words

(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)

print("Training data shape:", x_train.shape)
print("Test data shape:", x_test.shape)


Training data shape: (25000,)
Test data shape: (25000,)


In [2]:
# Pad sequences to same length
x_train = sequence.pad_sequences(x_train, maxlen=max_len)
x_test = sequence.pad_sequences(x_test, maxlen=max_len)

print("Padded x_train shape:", x_train.shape)


Padded x_train shape: (25000, 200)


In [3]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense

model = Sequential()
model.add(Embedding(max_features, 128))        # word embeddings
model.add(SimpleRNN(64))                       # simple RNN layer
model.add(Dense(1, activation='sigmoid'))      # binary classification

model.summary()


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, None, 128)         1280000   
                                                                 
 simple_rnn (SimpleRNN)      (None, 64)                12352     
                                                                 
 dense (Dense)               (None, 1)                 65        
                                                                 
Total params: 1,292,417
Trainable params: 1,292,417
Non-trainable params: 0
_________________________________________________________________


In [None]:
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

history = model.fit(x_train, y_train,
                    epochs=5,
                    batch_size=64,
                    validation_split=0.2)


Epoch 1/5
 72/313 [=====>........................] - ETA: 28s - loss: 0.6887 - accuracy: 0.5373  

In [5]:
y_pred = (model.predict(x_test) > 0.5).astype("int32")




In [6]:
from sklearn.metrics import accuracy_score, classification_report

print("Test Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))


Test Accuracy: 0.77968
Classification Report:
               precision    recall  f1-score   support

           0       0.82      0.71      0.76     12500
           1       0.75      0.85      0.79     12500

    accuracy                           0.78     25000
   macro avg       0.79      0.78      0.78     25000
weighted avg       0.79      0.78      0.78     25000



In [7]:
# Show first 10 actual vs predicted labels
for i in range(10):
    print(f"Sample {i+1}:  Actual = {y_test[i]},  Predicted = {y_pred[i][0]}")


Sample 1:  Actual = 0,  Predicted = 0
Sample 2:  Actual = 1,  Predicted = 1
Sample 3:  Actual = 1,  Predicted = 1
Sample 4:  Actual = 0,  Predicted = 1
Sample 5:  Actual = 1,  Predicted = 1
Sample 6:  Actual = 1,  Predicted = 0
Sample 7:  Actual = 1,  Predicted = 1
Sample 8:  Actual = 0,  Predicted = 0
Sample 9:  Actual = 0,  Predicted = 1
Sample 10:  Actual = 1,  Predicted = 1
