In [1]:
import tensorflow as tf
from tensorflow.keras.models  import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing import sequence

In [2]:
# Load IMDB dataset (only top 5000 words)
max_features = 5000
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)

print("Training samples:", len(x_train))
print("Test samples:", len(x_test))

Training samples: 25000
Test samples: 25000


In [3]:
# Pad sequences so they are all the same length
max_len = 500
x_train = sequence.pad_sequences(x_train, maxlen=max_len)
x_test = sequence.pad_sequences(x_test, maxlen=max_len)

print("Training shape:", x_train.shape)
print("Testing shape:", x_test.shape)

Training shape: (25000, 500)
Testing shape: (25000, 500)


In [4]:
model = Sequential()

# 1. Embedding layer
model.add(Embedding(max_features, 128, input_length=max_len))

# 2. LSTM layer
model.add(LSTM(100))  # 100 memory cells

# 3. Dense output layer
model.add(Dense(1, activation='sigmoid'))  # binary classification



In [5]:
model.add(Dense(1, activation='sigmoid'))  # binary classification

# Compile the model
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

print(model.summary())

None


In [6]:
history = model.fit(x_train, y_train,
                    batch_size=64,
                    epochs=3,
                    validation_data=(x_test, y_test))

Epoch 1/3
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m785s[0m 2s/step - accuracy: 0.5056 - loss: 0.6931 - val_accuracy: 0.5487 - val_loss: 0.6513
Epoch 2/3
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m625s[0m 2s/step - accuracy: 0.6660 - loss: 0.6279 - val_accuracy: 0.7493 - val_loss: 0.5854
Epoch 3/3
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m542s[0m 1s/step - accuracy: 0.7663 - loss: 0.5586 - val_accuracy: 0.7607 - val_loss: 0.5412


In [7]:
import numpy as np

# Pick one review from test set
sample_review = x_test[0].reshape(1, max_len)
prediction = model.predict(sample_review)

print("Prediction (probability of positive):", prediction[0][0])
print("Actual label:", y_test[0])

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 409ms/step
Prediction (probability of positive): 0.37826678
Actual label: 0


In [8]:
# changes 
#Change max_features to 10,000 and observe any change in accuracy.
max_features = 10000
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)
x_train = sequence.pad_sequences(x_train, maxlen=max_len)
x_test = sequence.pad_sequences(x_test, maxlen=max_len)

model_10k = Sequential([
    Embedding(max_features, 128, input_length=max_len),
    LSTM(100),
    Dense(1, activation='sigmoid')
])
model_10k.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model_10k.fit(x_train, y_train, epochs=2, batch_size=64, validation_data=(x_test, y_test))


Epoch 1/2
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m717s[0m 2s/step - accuracy: 0.7374 - loss: 0.5129 - val_accuracy: 0.7308 - val_loss: 0.5321
Epoch 2/2
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m819s[0m 2s/step - accuracy: 0.8521 - loss: 0.3470 - val_accuracy: 0.8592 - val_loss: 0.3356


<keras.src.callbacks.history.History at 0x1c8cbdef1a0>

In [10]:
#Change the number of LSTM units (e.g., 50, 150) and observe the effect.
for units in [50, 150]:
    print(f"\nTraining LSTM model with {units} units...")
    model_var = Sequential([
        Embedding(max_features, 128, input_length=max_len),
        LSTM(units),
        Dense(1, activation='sigmoid')
    ])
    model_var.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    model_var.fit(x_train, y_train, epochs=2, batch_size=64, validation_data=(x_test, y_test))



Training LSTM model with 50 units...
Epoch 1/2
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m410s[0m 1s/step - accuracy: 0.7769 - loss: 0.4694 - val_accuracy: 0.8225 - val_loss: 0.4000
Epoch 2/2
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m465s[0m 1s/step - accuracy: 0.8920 - loss: 0.2707 - val_accuracy: 0.8614 - val_loss: 0.3181

Training LSTM model with 150 units...
Epoch 1/2
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1215s[0m 3s/step - accuracy: 0.7833 - loss: 0.4542 - val_accuracy: 0.8544 - val_loss: 0.3485
Epoch 2/2
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1151s[0m 3s/step - accuracy: 0.8469 - loss: 0.3518 - val_accuracy: 0.8546 - val_loss: 0.3779


In [12]:
#Increase epochs to 5 or more. Compare training and validation accuracy.

model = Sequential([
    Embedding(input_dim=max_features, output_dim=128, input_length=max_len),
    LSTM(100),
    Dense(1, activation='sigmoid')
])

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

history_long = model.fit(
    x_train, y_train,
    batch_size=64,
    epochs=5,
    validation_data=(x_test, y_test)
)

Epoch 1/5
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m650s[0m 2s/step - accuracy: 0.8085 - loss: 0.4180 - val_accuracy: 0.8172 - val_loss: 0.4046
Epoch 2/5
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m669s[0m 2s/step - accuracy: 0.8759 - loss: 0.2999 - val_accuracy: 0.8733 - val_loss: 0.3248
Epoch 3/5
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m670s[0m 2s/step - accuracy: 0.9268 - loss: 0.1936 - val_accuracy: 0.8742 - val_loss: 0.3070
Epoch 4/5
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m733s[0m 2s/step - accuracy: 0.9399 - loss: 0.1620 - val_accuracy: 0.8641 - val_loss: 0.3355
Epoch 5/5
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m834s[0m 2s/step - accuracy: 0.9222 - loss: 0.1988 - val_accuracy: 0.8585 - val_loss: 0.4324
