In [4]:
from keras.datasets import imdb
from keras.preprocessing import sequence
from keras.utils import pad_sequences
from keras import Sequential
from keras.layers import Embedding, LSTM, Dense

# Load IMDb dataset
vocabulary_size = 5000
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=vocabulary_size)

# Padding sequences
max_words = 500
X_train = pad_sequences(X_train, maxlen=max_words)
X_test = pad_sequences(X_test, maxlen=max_words)

# Create a word index dictionary
word_index = imdb.get_word_index()
word2id = {word: index + 3 for word, index in word_index.items()}
word2id["<PAD>"] = 0
word2id["<START>"] = 1
word2id["<OOV>"] = 2

# Reverse the word index for decoding predictions
id2word = {index: word for word, index in word2id.items()}

# Define the model
embedding_size = 32
model = Sequential()
model.add(Embedding(vocabulary_size, embedding_size, input_length=max_words))
model.add(LSTM(100))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Training the model (assuming y_train2 is the binary labels)
batch_size = 64
num_epochs = 3
X_valid, y_valid = X_train[:batch_size], y_train[:batch_size]
X_train2, y_train2 = X_train[batch_size:], y_train[batch_size:]
model.fit(X_train2, y_train2, validation_data=(X_valid, y_valid), batch_size=batch_size, epochs=num_epochs)

# Predict sentiment
def predict_sentiment(review_text):
    review_sequence = [word2id.get(word, 0) for word in review_text.split()]
    padded_sequence = pad_sequences([review_sequence], maxlen=max_words)
    prediction = model.predict(padded_sequence)
    return prediction[0][0]

# Test predictions
positive_review = "I loved the movie! It was fantastic."
negative_review = "The film was terrible, and I hated it."
positive_sentiment = predict_sentiment(positive_review)
negative_sentiment = predict_sentiment(negative_review)

print("Predicted sentiment for the positive review:", positive_sentiment)
print("Predicted sentiment for the negative review:", negative_sentiment)

# Evaluate the model on the test set
scores = model.evaluate(X_test, y_test, verbose=0)
print('Test accuracy:', scores[1])


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb_word_index.json
Epoch 1/3
Epoch 2/3
Epoch 3/3
Predicted sentiment for the positive review: 0.9390678
Predicted sentiment for the negative review: 0.72343266
Test accuracy: 0.8744400143623352
