In [1]:
# GRU Step 1: Load IMDB dataset limited to top 10,000 words
from tensorflow.keras.datasets import imdb
num_words = 10000
(x_train_imdb, y_train_imdb), (x_test_imdb, y_test_imdb) = imdb.load_data(num_words=num_words)
print("Loaded IMDB with top", num_words, "words.")


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Loaded IMDB with top 10000 words.


In [2]:
# GRU Step 2: Show shapes and that it's split into x_train/y_train and x_test/y_test
print("x_train length:", len(x_train_imdb))
print("x_test length:", len(x_test_imdb))
print("Example sequence (first):", x_train_imdb[0][:20], "... (truncated)")
print("Corresponding label:", y_train_imdb[0])


x_train length: 25000
x_test length: 25000
Example sequence (first): [1, 14, 22, 16, 43, 530, 973, 1622, 1385, 65, 458, 4468, 66, 3941, 4, 173, 36, 256, 5, 25] ... (truncated)
Corresponding label: 1


In [3]:
# GRU Step 3: Pad sequences to a maximum length of 200
from tensorflow.keras.preprocessing.sequence import pad_sequences
maxlen = 200
x_train_pad = pad_sequences(x_train_imdb, maxlen=maxlen, padding='post', truncating='post')
x_test_pad  = pad_sequences(x_test_imdb,  maxlen=maxlen, padding='post', truncating='post')
print("Padded sequences to length:", maxlen)


Padded sequences to length: 200


In [4]:
# GRU Step 4-7: Build model: Embedding -> GRU -> Dense(sigmoid)
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Embedding, GRU, Dense

model_gru = Sequential([
    Embedding(input_dim=10000, output_dim=128, input_length=maxlen),  # Step 5
    GRU(128, dropout=0.2, recurrent_dropout=0.2),                     # Step 6
    Dense(1, activation='sigmoid')                                    # Step 7
])
print("GRU model built.")


GRU model built.




In [5]:
# GRU Step 8: Compile the model with binary_crossentropy, adam, and track accuracy
model_gru.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
print("GRU model compiled.")


GRU model compiled.


In [6]:
# GRU Step 9-10: Train the model for 3 epochs, batch_size=32, use test set as validation
history_gru = model_gru.fit(x_train_pad, y_train_imdb, epochs=3, batch_size=32, validation_data=(x_test_pad, y_test_imdb))


Epoch 1/3
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m433s[0m 547ms/step - accuracy: 0.5077 - loss: 0.6933 - val_accuracy: 0.5532 - val_loss: 0.6784
Epoch 2/3
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m443s[0m 549ms/step - accuracy: 0.6772 - loss: 0.5757 - val_accuracy: 0.8586 - val_loss: 0.3277
Epoch 3/3
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m463s[0m 591ms/step - accuracy: 0.9091 - loss: 0.2311 - val_accuracy: 0.8610 - val_loss: 0.3375


In [7]:
# GRU Step 11-12: Evaluate on the test data and print test accuracy
loss_gru, acc_gru = model_gru.evaluate(x_test_pad, y_test_imdb, verbose=0)
print("GRU test loss:", loss_gru)
print("GRU test accuracy:", acc_gru)


GRU test loss: 0.3374769389629364
GRU test accuracy: 0.8610399961471558


In [9]:
# GRU Step 13: Experiment - change GRU units (example: 64)
from tensorflow.keras.layers import GRU
model_gru_64 = Sequential([
    Embedding(10000, 128, input_length=maxlen),
    GRU(64, dropout=0.2, recurrent_dropout=0.2),
    Dense(1, activation='sigmoid')
])
model_gru_64.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
history_gru_64 = model_gru_64.fit(x_train_pad, y_train_imdb, epochs=3, batch_size=32, validation_data=(x_test_pad, y_test_imdb))
print("Trained GRU with 64 units.")


Epoch 1/3
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m265s[0m 332ms/step - accuracy: 0.5088 - loss: 0.6931 - val_accuracy: 0.5350 - val_loss: 0.6859
Epoch 2/3
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m259s[0m 329ms/step - accuracy: 0.6859 - loss: 0.5734 - val_accuracy: 0.8614 - val_loss: 0.3232
Epoch 3/3
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m256s[0m 327ms/step - accuracy: 0.9088 - loss: 0.2408 - val_accuracy: 0.8621 - val_loss: 0.3201
Trained GRU with 64 units.


In [10]:
# GRU Step 14: Change embedding dimension (example: 64 instead of 128)
model_embed_64 = Sequential([
    Embedding(10000, 64, input_length=maxlen),   # changed embedding dim
    GRU(128, dropout=0.2, recurrent_dropout=0.2),
    Dense(1, activation='sigmoid')
])
model_embed_64.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
history_embed_64 = model_embed_64.fit(x_train_pad, y_train_imdb, epochs=3, batch_size=32, validation_data=(x_test_pad, y_test_imdb))
print("Trained model with embedding dim 64.")


Epoch 1/3
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m367s[0m 463ms/step - accuracy: 0.5068 - loss: 0.6936 - val_accuracy: 0.5248 - val_loss: 0.6934
Epoch 2/3
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m381s[0m 462ms/step - accuracy: 0.5935 - loss: 0.6586 - val_accuracy: 0.7996 - val_loss: 0.4465
Epoch 3/3
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m420s[0m 510ms/step - accuracy: 0.8626 - loss: 0.3347 - val_accuracy: 0.8648 - val_loss: 0.3180
Trained model with embedding dim 64.


In [None]:
# GRU Step 15: Try using LSTM instead of GRU and compare (example)
from tensorflow.keras.layers import LSTM
model_lstm = Sequential([
    Embedding(10000, 128, input_length=maxlen),
    LSTM(128, dropout=0.2, recurrent_dropout=0.2),
    Dense(1, activation='sigmoid')
])
model_lstm.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
history_lstm = model_lstm.fit(x_train_pad, y_train_imdb, epochs=3, batch_size=32, validation_data=(x_test_pad, y_test_imdb))
print("Trained LSTM-based model.")
