In [1]:
# Import necessary libraries
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [2]:
# Load IMDB dataset from Keras, keeping only the top 10,000 most frequent words
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.imdb.load_data(num_words=10000)

# Pad sequences to ensure uniform length for all reviews
max_len_short = 200  # Max length for short sequences (IMDB reviews)
X_train_padded = pad_sequences(X_train, maxlen=max_len_short)
X_test_padded = pad_sequences(X_test, maxlen=max_len_short)

print(f"X_train_padded shape: {X_train_padded.shape}, X_test_padded shape: {X_test_padded.shape}")

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
X_train_padded shape: (25000, 200), X_test_padded shape: (25000, 200)


In [3]:
# Simulate long sequences (e.g., Wikipedia-like data with 1500 tokens per sequence)
long_sequences = np.random.randint(10000, size=(1000, 1500))  # 1000 samples, each with 1500 tokens
labels_long = np.random.randint(2, size=(1000,))  # Binary labels for the simulated long sequences

# Split long sequence dataset into training and testing sets
X_train_long, X_test_long, y_train_long, y_test_long = train_test_split(long_sequences, labels_long, test_size=0.2)

# Pad long sequences to a fixed length of 500 tokens
max_len_long = 500  # Truncate/pad sequences to 500 tokens
X_train_long_padded = pad_sequences(X_train_long, maxlen=max_len_long)
X_test_long_padded = pad_sequences(X_test_long, maxlen=max_len_long)

print(f"X_train_long_padded shape: {X_train_long_padded.shape}, X_test_long_padded shape: {X_test_long_padded.shape}")

X_train_long_padded shape: (800, 500), X_test_long_padded shape: (200, 500)


In [4]:
# Function to build a Sequential LSTM model
def build_model_lstm(input_length, units=64, num_layers=1):
    model = Sequential()
    model.add(Embedding(input_dim=10000, output_dim=128, input_length=input_length))
    for i in range(num_layers):
        if i == num_layers - 1:
            model.add(LSTM(units, return_sequences=False))
        else:
            model.add(LSTM(units, return_sequences=True))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

In [5]:
# Function to train and evaluate the model
def train_and_evaluate(X_train, X_test, y_train, y_test, units=64, num_layers=1, max_len=200):
    model = build_model_lstm(input_length=max_len, units=units, num_layers=num_layers)

    model.fit(X_train, y_train, epochs=5, batch_size=64, validation_split=0.2, verbose=1)

    y_pred = model.predict(X_test)
    y_pred = np.round(y_pred).flatten()  # Convert predictions to binary labels

    accuracy = accuracy_score(y_test, y_pred)
    print(f"LSTM - Units: {units}, Layers: {num_layers}, Accuracy: {accuracy:.4f}")

    return accuracy

In [6]:
# Evaluate LSTMs on the IMDB dataset (short sequences)
print("Short Sequence Dataset (IMDB)")
short_seq_results = []

# LSTM with 1 layer, 64 units
print("Evaluating LSTM (1 layer, 64 units)")
short_seq_results.append(train_and_evaluate(X_train_padded, X_test_padded, y_train, y_test, units=64, num_layers=1, max_len=max_len_short))

# LSTM with 2 layers, 64 units
print("Evaluating LSTM (2 layers, 64 units)")
short_seq_results.append(train_and_evaluate(X_train_padded, X_test_padded, y_train, y_test, units=64, num_layers=2, max_len=max_len_short))

# LSTM with 1 layer, 128 units
print("Evaluating LSTM (1 layer, 128 units)")
short_seq_results.append(train_and_evaluate(X_train_padded, X_test_padded, y_train, y_test, units=128, num_layers=1, max_len=max_len_short))

# LSTM with 2 layers, 128 units
print("Evaluating LSTM (2 layers, 128 units)")
short_seq_results.append(train_and_evaluate(X_train_padded, X_test_padded, y_train, y_test, units=128, num_layers=2, max_len=max_len_short))

Short Sequence Dataset (IMDB)
Evaluating LSTM (1 layer, 64 units)




Epoch 1/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m107s[0m 315ms/step - accuracy: 0.7068 - loss: 0.5315 - val_accuracy: 0.8454 - val_loss: 0.3528
Epoch 2/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m146s[0m 331ms/step - accuracy: 0.9036 - loss: 0.2517 - val_accuracy: 0.8742 - val_loss: 0.3342
Epoch 3/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m131s[0m 295ms/step - accuracy: 0.9173 - loss: 0.2079 - val_accuracy: 0.8534 - val_loss: 0.3590
Epoch 4/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m149s[0m 320ms/step - accuracy: 0.9574 - loss: 0.1240 - val_accuracy: 0.8616 - val_loss: 0.4416
Epoch 5/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m135s[0m 297ms/step - accuracy: 0.9620 - loss: 0.1058 - val_accuracy: 0.7820 - val_loss: 0.5167
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 38ms/step
LSTM - Units: 64, Layers: 1, Accuracy: 0.7901
Evaluating LSTM (2 layers, 64 units)
Epoch 1/5




[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m151s[0m 464ms/step - accuracy: 0.6989 - loss: 0.5402 - val_accuracy: 0.8734 - val_loss: 0.3144
Epoch 2/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m196s[0m 447ms/step - accuracy: 0.9045 - loss: 0.2507 - val_accuracy: 0.8702 - val_loss: 0.3104
Epoch 3/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m141s[0m 452ms/step - accuracy: 0.9445 - loss: 0.1571 - val_accuracy: 0.8550 - val_loss: 0.3512
Epoch 4/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m139s[0m 445ms/step - accuracy: 0.9593 - loss: 0.1177 - val_accuracy: 0.8602 - val_loss: 0.3813
Epoch 5/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m142s[0m 454ms/step - accuracy: 0.9743 - loss: 0.0778 - val_accuracy: 0.8380 - val_loss: 0.5227
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m55s[0m 70ms/step
LSTM - Units: 64, Layers: 2, Accuracy: 0.8455
Evaluating LSTM (1 layer, 128 units)
Epoch 1/5




[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m174s[0m 549ms/step - accuracy: 0.7121 - loss: 0.5368 - val_accuracy: 0.8610 - val_loss: 0.3381
Epoch 2/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m201s[0m 545ms/step - accuracy: 0.8976 - loss: 0.2623 - val_accuracy: 0.8652 - val_loss: 0.3141
Epoch 3/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m196s[0m 526ms/step - accuracy: 0.9295 - loss: 0.1891 - val_accuracy: 0.8644 - val_loss: 0.3313
Epoch 4/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m202s[0m 528ms/step - accuracy: 0.9534 - loss: 0.1384 - val_accuracy: 0.8448 - val_loss: 0.3746
Epoch 5/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m165s[0m 527ms/step - accuracy: 0.9671 - loss: 0.0947 - val_accuracy: 0.8642 - val_loss: 0.4308
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m92s[0m 117ms/step
LSTM - Units: 128, Layers: 1, Accuracy: 0.8511
Evaluating LSTM (2 layers, 128 units)
Epoch 1/5




[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m329s[0m 1s/step - accuracy: 0.7233 - loss: 0.5221 - val_accuracy: 0.8586 - val_loss: 0.3450
Epoch 2/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m398s[0m 1s/step - accuracy: 0.8931 - loss: 0.2780 - val_accuracy: 0.8714 - val_loss: 0.3177
Epoch 3/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m366s[0m 1s/step - accuracy: 0.9348 - loss: 0.1770 - val_accuracy: 0.8606 - val_loss: 0.3518
Epoch 4/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m396s[0m 1s/step - accuracy: 0.9575 - loss: 0.1184 - val_accuracy: 0.8638 - val_loss: 0.4169
Epoch 5/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m367s[0m 1s/step - accuracy: 0.9749 - loss: 0.0745 - val_accuracy: 0.8524 - val_loss: 0.4672
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m179s[0m 228ms/step
LSTM - Units: 128, Layers: 2, Accuracy: 0.8554


In [7]:
# Evaluate LSTMs on the simulated long sequences (e.g., Wikipedia-like)
print("\nLong Sequence Dataset (Simulated Wikipedia)")
long_seq_results = []

# LSTM with 1 layer, 64 units
print("Evaluating LSTM (1 layer, 64 units)")
long_seq_results.append(train_and_evaluate(X_train_long_padded, X_test_long_padded, y_train_long, y_test_long, units=64, num_layers=1, max_len=max_len_long))

# LSTM with 2 layers, 64 units
print("Evaluating LSTM (2 layers, 64 units)")
long_seq_results.append(train_and_evaluate(X_train_long_padded, X_test_long_padded, y_train_long, y_test_long, units=64, num_layers=2, max_len=max_len_long))

# LSTM with 1 layer, 128 units
print("Evaluating LSTM (1 layer, 128 units)")
long_seq_results.append(train_and_evaluate(X_train_long_padded, X_test_long_padded, y_train_long, y_test_long, units=128, num_layers=1, max_len=max_len_long))

# LSTM with 2 layers, 128 units
print("Evaluating LSTM (2 layers, 128 units)")
long_seq_results.append(train_and_evaluate(X_train_long_padded, X_test_long_padded, y_train_long, y_test_long, units=128, num_layers=2, max_len=max_len_long))


Long Sequence Dataset (Simulated Wikipedia)
Evaluating LSTM (1 layer, 64 units)
Epoch 1/5




[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 782ms/step - accuracy: 0.4790 - loss: 0.6938 - val_accuracy: 0.4875 - val_loss: 0.6940
Epoch 2/5
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 648ms/step - accuracy: 0.8301 - loss: 0.6773 - val_accuracy: 0.4938 - val_loss: 0.6956
Epoch 3/5
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 852ms/step - accuracy: 0.9841 - loss: 0.5962 - val_accuracy: 0.5063 - val_loss: 0.7077
Epoch 4/5
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 616ms/step - accuracy: 1.0000 - loss: 0.3516 - val_accuracy: 0.4938 - val_loss: 0.8424
Epoch 5/5
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 724ms/step - accuracy: 0.9997 - loss: 0.0822 - val_accuracy: 0.4812 - val_loss: 1.0503
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 101ms/step
LSTM - Units: 64, Layers: 1, Accuracy: 0.4900
Evaluating LSTM (2 layers, 64 units)
Epoch 1/5




[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 950ms/step - accuracy: 0.5042 - loss: 0.6932 - val_accuracy: 0.5000 - val_loss: 0.6939
Epoch 2/5
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 1s/step - accuracy: 0.5652 - loss: 0.6768 - val_accuracy: 0.4938 - val_loss: 0.6979
Epoch 3/5
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 1s/step - accuracy: 0.9827 - loss: 0.4669 - val_accuracy: 0.4938 - val_loss: 0.8542
Epoch 4/5
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 911ms/step - accuracy: 1.0000 - loss: 0.0662 - val_accuracy: 0.5250 - val_loss: 1.5425
Epoch 5/5
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 940ms/step - accuracy: 1.0000 - loss: 0.0098 - val_accuracy: 0.4750 - val_loss: 2.2870




[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 175ms/step
LSTM - Units: 64, Layers: 2, Accuracy: 0.5050
Evaluating LSTM (1 layer, 128 units)
Epoch 1/5




[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 1s/step - accuracy: 0.4966 - loss: 0.6934 - val_accuracy: 0.5063 - val_loss: 0.6933
Epoch 2/5
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 2s/step - accuracy: 0.7466 - loss: 0.6750 - val_accuracy: 0.4688 - val_loss: 0.6997
Epoch 3/5
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 2s/step - accuracy: 0.9567 - loss: 0.5274 - val_accuracy: 0.5063 - val_loss: 0.7306
Epoch 4/5
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 2s/step - accuracy: 0.9803 - loss: 0.1958 - val_accuracy: 0.5312 - val_loss: 0.8746
Epoch 5/5
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 2s/step - accuracy: 0.9957 - loss: 0.0429 - val_accuracy: 0.5500 - val_loss: 1.0685




[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 304ms/step
LSTM - Units: 128, Layers: 1, Accuracy: 0.5000
Evaluating LSTM (2 layers, 128 units)
Epoch 1/5




[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 4s/step - accuracy: 0.5368 - loss: 0.6937 - val_accuracy: 0.5437 - val_loss: 0.6932
Epoch 2/5
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 3s/step - accuracy: 0.6239 - loss: 0.6744 - val_accuracy: 0.5000 - val_loss: 0.7499
Epoch 3/5
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 3s/step - accuracy: 0.9467 - loss: 0.3443 - val_accuracy: 0.4500 - val_loss: 0.8226
Epoch 4/5
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 3s/step - accuracy: 0.9848 - loss: 0.1122 - val_accuracy: 0.5000 - val_loss: 1.7453
Epoch 5/5
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 3s/step - accuracy: 0.9981 - loss: 0.0094 - val_accuracy: 0.5125 - val_loss: 1.9499
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 494ms/step
LSTM - Units: 128, Layers: 2, Accuracy: 0.5000
