In [1]:
import numpy as np
import pandas as pd
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from sklearn.metrics import accuracy_score

# Load the dataset
from tensorflow.keras.datasets import imdb

# Set vocabulary size and maximum sequence length
vocab_size = 10000
maxlen = 200

# Load the IMDB dataset
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=vocab_size)

# Pad sequences to ensure uniform input length
X_train = pad_sequences(X_train, maxlen=maxlen)
X_test = pad_sequences(X_test, maxlen=maxlen)

# Build the LSTM model
model = Sequential()

# Add Embedding layer
model.add(Embedding(input_dim=vocab_size, output_dim=128, input_length=maxlen))

# Add LSTM layer
model.add(LSTM(units=128, dropout=0.2, recurrent_dropout=0.2))

# Add a Dense layer with sigmoid activation for binary classification
model.add(Dense(1, activation='sigmoid'))

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Model summary
model.summary()

# Train the model
batch_size = 64
epochs = 5
model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(X_test, y_test), verbose=2)

# Predict the sentiment on the test set
y_pred = model.predict(X_test)
y_pred = (y_pred > 0.5).astype("int32")

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f'Test Accuracy: {accuracy:.2f}')

# Example review for prediction
example_review = "This movie was absolutely wonderful, a masterpiece!"

# Tokenize and pad the example review
tokenizer = Tokenizer(num_words=vocab_size)
tokenizer.fit_on_texts([example_review])
sequence = tokenizer.texts_to_sequences([example_review])
padded_sequence = pad_sequences(sequence, maxlen=maxlen)

# Predict the sentiment
sentiment = model.predict(padded_sequence)
sentiment_label = "Positive" if sentiment < 0.5 else "Negative"
print(f'Sentiment: {sentiment_label}')


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step




Epoch 1/5
391/391 - 275s - 702ms/step - accuracy: 0.7688 - loss: 0.4806 - val_accuracy: 0.8371 - val_loss: 0.3756
Epoch 2/5
391/391 - 310s - 794ms/step - accuracy: 0.8644 - loss: 0.3306 - val_accuracy: 0.8553 - val_loss: 0.3601
Epoch 3/5
391/391 - 259s - 663ms/step - accuracy: 0.8846 - loss: 0.2892 - val_accuracy: 0.8582 - val_loss: 0.3440
Epoch 4/5
391/391 - 259s - 663ms/step - accuracy: 0.9058 - loss: 0.2416 - val_accuracy: 0.8337 - val_loss: 0.4034
Epoch 5/5
391/391 - 278s - 710ms/step - accuracy: 0.9208 - loss: 0.2090 - val_accuracy: 0.8115 - val_loss: 0.4349
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m90s[0m 115ms/step
Test Accuracy: 0.81
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step
Sentiment: Negative
