In [8]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Embedding, LSTM, Dense, Bidirectional
from sklearn.metrics import classification_report

#data preprocessing (Tokenization & Padding)
def load_imdb_data():
    vocab_size = 10000  # Vocab size
    maxlen = 200        # standarize length

    #load and tokenize the IMDB dataset
    (X_train, y_train), (X_test, y_test) = keras.datasets.imdb.load_data(num_words=vocab_size)

    #pad sequences that ensure uniform length
    X_train = keras.preprocessing.sequence.pad_sequences(X_train, maxlen=maxlen)
    X_test = keras.preprocessing.sequence.pad_sequences(X_test, maxlen=maxlen)

    return X_train, y_train, X_test, y_test

#load  dataset
X_train, y_train, X_test, y_test = load_imdb_data()

#Model Architecture (Embedding + LSTM)
model = keras.Sequential([
    #Embedding layer
    Embedding(input_dim=10000, output_dim=64),

    #Bidirectional LSTM - architectural improvement*********
    Bidirectional(LSTM(64)),

    #Output layer (binary classification: positive/negative sentiment)
    Dense(1, activation='sigmoid')
])

#compile model
model.compile(
    optimizer=keras.optimizers.Adam(1e-3),
    loss='binary_crossentropy',
    metrics=['accuracy']
)

# Train the model
history = model.fit(
    X_train, y_train,
    epochs=10,
    batch_size=64,
    validation_split=0.2  # 20% training data for validation
)

#Model Evaluation - Predict sentiment probabilities
y_pred_probs = model.predict(X_test)

# Convert probabilities to binary predictions (0- negative or 1 - positive)
y_pred = (y_pred_probs > 0.5).astype("int32")

# classification report - precision, recall, F1-score
print("Classification Report:")
print(classification_report(y_test, y_pred))

Epoch 1/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m99s[0m 300ms/step - accuracy: 0.6948 - loss: 0.5500 - val_accuracy: 0.8604 - val_loss: 0.3405
Epoch 2/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m151s[0m 328ms/step - accuracy: 0.9001 - loss: 0.2537 - val_accuracy: 0.8600 - val_loss: 0.3186
Epoch 3/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m141s[0m 326ms/step - accuracy: 0.9337 - loss: 0.1814 - val_accuracy: 0.8590 - val_loss: 0.3583
Epoch 4/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m99s[0m 317ms/step - accuracy: 0.9479 - loss: 0.1486 - val_accuracy: 0.8686 - val_loss: 0.3677
Epoch 5/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m141s[0m 316ms/step - accuracy: 0.9619 - loss: 0.1102 - val_accuracy: 0.8686 - val_loss: 0.4518
Epoch 6/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m99s[0m 315ms/step - accuracy: 0.9713 - loss: 0.0858 - val_accuracy: 0.8566 - val_loss: 0.3832
Epoch 7