9 Implement LSTM for sentiment analysis on dataset UMICH SI65 or similar.

In [1]:
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout

# 1. Load the IMDB dataset
vocab_size = 10000  # top 10,000 words
maxlen = 200        # max words per review

(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=vocab_size)

# 2. Pad sequences to ensure uniform length
x_train = pad_sequences(x_train, maxlen=maxlen)
x_test = pad_sequences(x_test, maxlen=maxlen)

# 3. Build the LSTM model
model = Sequential()
model.add(Embedding(input_dim=vocab_size, output_dim=128, input_length=maxlen))
model.add(LSTM(64, return_sequences=False))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))

# 4. Compile the model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# 5. Train the model
model.fit(x_train, y_train, epochs=3, batch_size=128, validation_split=0.2)

# 6. Evaluate the model
loss, accuracy = model.evaluate(x_test, y_test)
print(f"\n✅ Test Accuracy: {accuracy:.4f}")

# 7. Predict on custom review (optional)
word_index = imdb.get_word_index()
reverse_word_index = {value: key for key, value in word_index.items()}

# Decode an example
def decode_review(encoded):
    return ' '.join([reverse_word_index.get(i - 3, '?') for i in encoded])

print("\nSample decoded review:")
print(decode_review(x_train[0]))
print("\nSentiment:", "Positive" if y_train[0] == 1 else "Negative")


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 0us/step




Epoch 1/3
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 16ms/step - accuracy: 0.6209 - loss: 0.6287 - val_accuracy: 0.8436 - val_loss: 0.3640
Epoch 2/3
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 14ms/step - accuracy: 0.8862 - loss: 0.2902 - val_accuracy: 0.8620 - val_loss: 0.3449
Epoch 3/3
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 14ms/step - accuracy: 0.9322 - loss: 0.1926 - val_accuracy: 0.8676 - val_loss: 0.3534
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 5ms/step - accuracy: 0.8638 - loss: 0.3731

✅ Test Accuracy: 0.8625
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb_word_index.json
[1m1641221/1641221[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1us/step

Sample decoded review:
and you could just imagine being there robert ? is an amazing actor and now the same being director ? father came from the same scottish island as myself so i loved the f