In [3]:
import numpy as np
from keras.datasets import imdb
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Embedding, LSTM, Dense

In [4]:
# Setting the features for IMDB dataset
max_features = 10000  # Number of words to consider as features
maxlen = 100  # Cut texts after this number of words (among top max_features most common words)
batch_size = 32

In [5]:
# IMDB data loaded as training and testing sets

(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 0us/step


In [6]:
#pad_sequences is used to ensure that the training and testing set are of the same length
x_train = pad_sequences(x_train, maxlen=maxlen)
x_test = pad_sequences(x_test, maxlen=maxlen)

In [7]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Embedding


In [8]:
model = Sequential()
model.add(Embedding(max_features, 128, input_length=maxlen))
model.add(LSTM(64, return_sequences=True))
model.add(LSTM(64))
model.add(Dense(1, activation='sigmoid'))



In [9]:
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

In [10]:
model.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=3,
          validation_data=(x_test, y_test))

Epoch 1/3
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m55s[0m 68ms/step - accuracy: 0.7486 - loss: 0.4871 - val_accuracy: 0.8474 - val_loss: 0.3453
Epoch 2/3
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m51s[0m 65ms/step - accuracy: 0.9030 - loss: 0.2517 - val_accuracy: 0.8466 - val_loss: 0.3688
Epoch 3/3
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 67ms/step - accuracy: 0.9301 - loss: 0.1817 - val_accuracy: 0.8374 - val_loss: 0.4313


<keras.src.callbacks.history.History at 0x12d216980>

In [11]:
score, acc = model.evaluate(x_test, y_test, batch_size=batch_size)
print('Test score:', score)
print('Test accuracy:', acc)

[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 14ms/step - accuracy: 0.8354 - loss: 0.4343
Test score: 0.4313129782676697
Test accuracy: 0.8373600244522095


In [12]:
import numpy as np

x_sample = x_test[10:21]
y_sample_actual = y_test[10:21]

# Use the trained model to predict the sentiment
y_sample_predicted = model.predict(x_sample)

# Convert predictions to binary values (0 or 1)
y_sample_predicted = (y_sample_predicted > 0.5).astype(int)

# Compare predictions with actual values
print("Actual Sentiments:", y_sample_actual)
print("Predicted Sentiments:", y_sample_predicted.flatten())

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 127ms/step
Actual Sentiments: [1 0 0 0 1 0 1 0 0 0 1]
Predicted Sentiments: [1 0 0 0 1 0 1 0 0 0 1]
