In [None]:
# Import necessary libraries
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GRU, LSTM, Dense, Embedding
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.datasets import imdb
from sklearn.model_selection import train_test_split
import numpy as np


In [None]:
# Step (a): Preprocessing the Data
max_features = 10000
maxlen = 100

In [None]:
# Load the data (IMDB dataset)
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [None]:
# Pad sequences (to ensure equal length inputs)
x_train = pad_sequences(x_train, maxlen=maxlen)
x_test = pad_sequences(x_test, maxlen=maxlen)

In [None]:
# Step (c): Build the GRU model
gru_model = Sequential()
gru_model.add(Embedding(max_features, 128))
gru_model.add(GRU(128, dropout=0.2, recurrent_dropout=0.2))
gru_model.add(Dense(1, activation='sigmoid'))

In [None]:
# Compile the model
gru_model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
# Step (d): Training the GRU model
gru_model.fit(x_train, y_train, epochs=5, batch_size=32, validation_data=(x_test, y_test))

Epoch 1/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m162s[0m 203ms/step - accuracy: 0.6326 - loss: 13215.3789 - val_accuracy: 0.6827 - val_loss: 0.5890
Epoch 2/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m201s[0m 201ms/step - accuracy: 0.7542 - loss: 12.9658 - val_accuracy: 0.6845 - val_loss: 0.5837
Epoch 3/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m210s[0m 212ms/step - accuracy: 0.7759 - loss: 0.4761 - val_accuracy: 0.6912 - val_loss: 0.5870
Epoch 4/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m199s[0m 208ms/step - accuracy: 0.8050 - loss: 0.4261 - val_accuracy: 0.6952 - val_loss: 0.5946
Epoch 5/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m201s[0m 207ms/step - accuracy: 0.8285 - loss: 0.7048 - val_accuracy: 0.6880 - val_loss: 0.6117


<keras.src.callbacks.history.History at 0x7c872229f790>

In [None]:
from tensorflow.keras.datasets import imdb
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Step 1: Load the IMDB dataset (already split into training and test sets)
max_features = 10000  # Number of unique words to use (most frequent)
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)

# Optional: Combine x_train and x_test to perform custom splitting
data = np.concatenate((x_train, x_test), axis=0)
labels = np.concatenate((y_train, y_test), axis=0)

# Step 2: Use train_test_split to split the data further
x_train, x_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, random_state=42)

# Step 3: Pad sequences to ensure uniform input size
maxlen = 100  # Max length of review (in words)
x_train = pad_sequences(x_train, maxlen=maxlen)
x_test = pad_sequences(x_test, maxlen=maxlen)

# Output shapes to verify
print(f"Training data shape: {x_train.shape}, Training labels shape: {y_train.shape}")
print(f"Test data shape: {x_test.shape}, Test labels shape: {y_test.shape}")


Training data shape: (40000, 100), Training labels shape: (40000,)
Test data shape: (10000, 100), Test labels shape: (10000,)


In [None]:
# Step (f): Evaluate GRU Model's accuracy
gru_score, gru_acc = gru_model.evaluate(x_test, y_test)
print(f"GRU Test Accuracy: {gru_acc}")

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 36ms/step - accuracy: 0.7507 - loss: 0.5138
GRU Test Accuracy: 0.7520999908447266


In [None]:
# Step (c - e) for LSTM model
lstm_model = Sequential()
lstm_model.add(Embedding(max_features, 128))
lstm_model.add(LSTM(128, dropout=0.2, recurrent_dropout=0.2))
lstm_model.add(Dense(1, activation='sigmoid'))

In [None]:
# Compile the model
lstm_model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
# Step (d): Training the LSTM model
lstm_model.fit(x_train, y_train, epochs=5, batch_size=32, validation_data=(x_test, y_test))

Epoch 1/5
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m266s[0m 211ms/step - accuracy: 0.7422 - loss: 0.5051 - val_accuracy: 0.8540 - val_loss: 0.3469
Epoch 2/5
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m316s[0m 206ms/step - accuracy: 0.8673 - loss: 0.3204 - val_accuracy: 0.8550 - val_loss: 0.3303
Epoch 3/5
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m259s[0m 204ms/step - accuracy: 0.9026 - loss: 0.2441 - val_accuracy: 0.8633 - val_loss: 0.3468
Epoch 4/5
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m259s[0m 202ms/step - accuracy: 0.9295 - loss: 0.1867 - val_accuracy: 0.8576 - val_loss: 0.3408
Epoch 5/5
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m252s[0m 202ms/step - accuracy: 0.9445 - loss: 0.1466 - val_accuracy: 0.8552 - val_loss: 0.4078


<keras.src.callbacks.history.History at 0x7c87202f5ea0>

In [None]:
# Step (f): Evaluate LSTM Model's accuracy
lstm_score, lstm_acc = lstm_model.evaluate(x_test, y_test)
print(f"LSTM Test Accuracy: {lstm_acc}")

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 64ms/step - accuracy: 0.8593 - loss: 0.3973
LSTM Test Accuracy: 0.8551999926567078
