In [24]:
import pandas as pd
from keras.datasets import imdb
from keras.preprocessing.sequence import pad_sequences

In [25]:
# Load top 10,000 most frequent words
max_features = 10000
max_len = 200

(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=max_features)

# Pad sequences to ensure equal length
X_train = pad_sequences(X_train, maxlen=max_len)
X_test = pad_sequences(X_test, maxlen=max_len)

In [26]:
X_train.shape, y_train.shape, X_test.shape, y_test.shape

((25000, 200), (25000,), (25000, 200), (25000,))

In [27]:
from keras.models import Sequential
from keras.layers import Embedding, LSTM, Dense, Dropout

model = Sequential()
model.add(Embedding(input_dim=max_features, output_dim=128, input_length=max_len))
model.add(LSTM(64, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

model.summary()



In [28]:
from keras.models import Sequential
from keras.layers import Embedding, LSTM, Dense, Dropout

model = Sequential()
model.add(Embedding(input_dim=max_features, output_dim=128, input_length=max_len))
model.add(LSTM(64, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

model.summary()

In [29]:
history = model.fit(X_train, y_train,
                    batch_size=128,
                    epochs=5,
                    validation_split=0.2,
                    verbose=2)


Epoch 1/5
157/157 - 73s - 467ms/step - accuracy: 0.7469 - loss: 0.5108 - val_accuracy: 0.8280 - val_loss: 0.4001
Epoch 2/5
157/157 - 65s - 412ms/step - accuracy: 0.8551 - loss: 0.3495 - val_accuracy: 0.8506 - val_loss: 0.3734
Epoch 3/5
157/157 - 120s - 764ms/step - accuracy: 0.8788 - loss: 0.3007 - val_accuracy: 0.8264 - val_loss: 0.3906
Epoch 4/5
157/157 - 130s - 829ms/step - accuracy: 0.8947 - loss: 0.2645 - val_accuracy: 0.8038 - val_loss: 0.4298
Epoch 5/5
157/157 - 110s - 703ms/step - accuracy: 0.9148 - loss: 0.2230 - val_accuracy: 0.8404 - val_loss: 0.3895


In [30]:
score, acc = model.evaluate(X_test, y_test, verbose=0)
print(f"Test Loss: {score:.4f}")
print(f"Test Accuracy: {acc:.4f}")

Test Loss: 0.3973
Test Accuracy: 0.8381


In [31]:
word_index = imdb.get_word_index()
reverse_word_index = {value: key for key, value in word_index.items()}

def decode_review(encoded_review):
    return ' '.join([reverse_word_index.get(i - 3, '?') for i in encoded_review if i >= 3])

samples = X_test[:5]
# predictions = model.predict(samples)
# for i, pred in enumerate(predictions):
#     print("Review:", decode_review(samples[i]))
#     print("Predicted Sentiment:", "Positive" if pred[0] > 0.5 else "Negative")


decoded_reviews = [decode_review(sample) for sample in samples]
predicted_sentiments = ["Positive" if pred[0] > 0.5 else "Negative" for pred in predictions]

df_results = pd.DataFrame({
    "Review": decoded_reviews,
    "Predicted Sentiment": predicted_sentiments
})

df_results



Unnamed: 0,Review,Predicted Sentiment
0,please give this one a miss br br and the rest...,Negative
1,psychological it's very interesting that rober...,Positive
2,everyone's horror the promptly eats the mayor ...,Positive
3,i generally love this type of movie however th...,Positive
4,like some other people wrote i'm a die hard ma...,Positive
