## IMDB sentiment analysis

# Importing Libraries

In [7]:
import tensorflow as tf
from tensorflow.keras.datasets import imdb # built in dataset
from tensorflow.keras.preprocessing.sequence import pad_sequences # to pad text to same size
from tensorflow.keras.models import Sequential # Allows us to build a model layer by layer
from tensorflow.keras.layers import Embedding, SimpleRNN, LSTM, GRU, Dense # Layers we'll use

# Load Dataset

In [2]:
(X_train,y_train), (X_test, y_test)= imdb.load_data(num_words=10000)  #num_words=10000 means only keep the top 10,000 most common words

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz


# Pad the sequences

In [3]:
X_train = pad_sequences(X_train, maxlen=300) # All reviews are now 300 words long
X_test = pad_sequences(X_test, maxlen=300)

## Build the model

In [17]:
model= Sequential() # Basic model where we stack layers one by one
model.add(Embedding(input_dim=10000, output_dim=32, input_length=300))
model.add(LSTM(units=32))  # LSTM layer with 32 memory units(neurons)
model.add(Dense(1, activation='sigmoid')) # Final output layer" 1 neuron, gives us probabilty

## Compile the model

In [20]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model

In [21]:
model.fit(X_train, y_train, epochs=3, batch_size=64, validation_split=0.2)

Epoch 1/3


Epoch 2/3
Epoch 3/3


<keras.src.callbacks.History at 0x1795e50be50>

# Evaluate 

In [22]:
test_loss, test_acc = model.evaluate(X_test,y_test)
print("Test Accuracy:", test_acc)

Test Accuracy: 0.8688399791717529


# Try GRU

In [23]:
model2 = Sequential()
model2.add(Embedding(10000, 32, input_length=300))
model2.add(GRU(32))  # Switchting between LSTM, GRU, or SimpleRNN here
model2.add(Dense(1, activation='sigmoid'))
model2.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model2.fit(X_train, y_train, epochs=3, batch_size=64, validation_split=0.2)


Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.src.callbacks.History at 0x17965d29050>

In [24]:
test_loss, test_acc = model2.evaluate(X_test,y_test)
print("Test Accuracy:", test_acc)

Test Accuracy: 0.8712000250816345


# Trying LSTM

In [25]:
model3 = Sequential()
model3.add(Embedding(10000, 32, input_length=300))
model3.add(LSTM(32))  # Switchting between LSTM, GRU, or SimpleRNN here
model3.add(Dense(1, activation='sigmoid'))
model3.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model3.fit(X_train, y_train, epochs=3, batch_size=64, validation_split=0.2)


Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.src.callbacks.History at 0x1796344d590>

In [26]:
test_loss, test_acc = model3.evaluate(X_test,y_test)
print("Test Accuracy:", test_acc)

Test Accuracy: 0.8677999973297119


I experimented with RNN, GRU, and LSTM for sentiment analysis on the IMDB dataset.
I compared accuracy, training time, and generalization.
GRU performed best with 87.12% accuracy and lowest training time.
Based on this, I chose GRU for deployment.