In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras import regularizers

In [2]:
# Hyperparameters and Setup
max_features = 10000  # Vocabulary size
max_len = 500         # Max review length
embedding_dim = 64    # Embedding size
lstm_units = 64       # LSTM units

print("Loading data...")
# Load the dataset
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=max_features)
print(f'Training data shape: {X_train.shape}, Testing data shape: {X_test.shape}')

# Preprocessing: Padding Sequences
print("\nPadding sequences...")
X_train = sequence.pad_sequences(X_train, maxlen=max_len)
X_test = sequence.pad_sequences(X_test, maxlen=max_len)
print(f'X_train shape after padding: {X_train.shape}')

# Model Definition
model = Sequential()

# 1. Embedding Layer
model.add(Embedding(max_features, embedding_dim, input_length=max_len))

# 2. LSTM Layer (Reduced units for regularization)
model.add(LSTM(lstm_units, activation='tanh'))

# 3. Dropout (Increased rate for stronger regularization)
model.add(Dropout(0.6))

# 4. Output Layer (L2 Regularization added to reduce bias)
model.add(Dense(1, activation="sigmoid", kernel_regularizer=regularizers.l2(0.001)))

model.build(input_shape=(None, max_len))
model.summary()

# Compile and Train the Model
opt = Adam(learning_rate=0.0005)
model.compile(optimizer=opt, loss='binary_crossentropy', metrics=['accuracy'])

# Early stopping setup
earlystopping = EarlyStopping(
    monitor='val_loss',
    patience=5,
    restore_best_weights=True,
    verbose=1
)


Loading data...
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Training data shape: (25000,), Testing data shape: (25000,)

Padding sequences...
X_train shape after padding: (25000, 500)




In [3]:
print("\nStarting model training...")
history = model.fit(
    X_train,
    y_train,
    epochs=20, # Relying on early stopping
    batch_size=32,
    validation_split=0.2,
    callbacks=[earlystopping]
)




Starting model training...
Epoch 1/20
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 20ms/step - accuracy: 0.6753 - loss: 0.5872 - val_accuracy: 0.8452 - val_loss: 0.3732
Epoch 2/20
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 19ms/step - accuracy: 0.8947 - loss: 0.2839 - val_accuracy: 0.8788 - val_loss: 0.2942
Epoch 3/20
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 19ms/step - accuracy: 0.9301 - loss: 0.2005 - val_accuracy: 0.8790 - val_loss: 0.3003
Epoch 4/20
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 19ms/step - accuracy: 0.9466 - loss: 0.1632 - val_accuracy: 0.8796 - val_loss: 0.3157
Epoch 5/20
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 19ms/step - accuracy: 0.9560 - loss: 0.1361 - val_accuracy: 0.8682 - val_loss: 0.3403
Epoch 6/20
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 19ms/step - accuracy: 0.9523 - loss: 0.1482 - val_accuracy: 0.8688 - val_l

In [4]:
# --- NEW LOGIC FOR ACCURACY ---
# Find the best accuracy and corresponding epoch from the training history
best_val_acc = max(history.history['val_accuracy'])
best_val_epoch = history.history['val_accuracy'].index(best_val_acc) + 1
best_train_acc_at_best_val = history.history['accuracy'][best_val_epoch - 1]

print("\n--- Training Performance Summary ---")
print(f"Best Validation Accuracy: {best_val_acc:.4f} (at epoch {best_val_epoch})")
print(f"Training Accuracy at Best Validation Epoch: {best_train_acc_at_best_val:.4f}")
# -------------------------------

# Evaluate on the Test Data
loss, acc = model.evaluate(X_test, y_test, verbose=0)
print(f"\n--- Final Test Evaluation ---")
print(f"Test Accuracy of the improved model: {acc*100:.2f}%")
print(f"Test Loss: {loss:.4f}")


--- Training Performance Summary ---
Best Validation Accuracy: 0.8796 (at epoch 4)
Training Accuracy at Best Validation Epoch: 0.9420

--- Final Test Evaluation ---
Test Accuracy of the improved model: 87.36%
Test Loss: 0.3089


In [5]:
# Save the improved model
model.save('lstmRNN.h5')
print("\nImproved model saved as 'lstmRNN.h5'.")




Improved model saved as 'lstmRNN.h5'.
