In [1]:
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense, Activation

# Hyperparameters
max_features = 10000  # Number of most frequent words to consider
maxlen = 200  # Maximum review length

# Load the IMDB dataset
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)

# Pad sequences to ensure consistent length
x_train = pad_sequences(x_train, maxlen=maxlen)
x_test = pad_sequences(x_test, maxlen=maxlen)

# Build the RNN model
model = Sequential()
model.add(Embedding(max_features, 128, input_length=maxlen))  # Embedding layer
model.add(SimpleRNN(64))  # SimpleRNN layer with 64 units
model.add(Dense(1))  # Output layer with 1 unit for binary classification
model.add(Activation('sigmoid'))  # Sigmoid activation for probability output

# Compile the model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
model.fit(x_train, y_train, epochs=5, batch_size=32, validation_data=(x_test, y_test))

# Evaluate the model on test data
loss, accuracy = model.evaluate(x_test, y_test)
print("Test Loss:", loss, "Test Accuracy:", accuracy)




Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test Loss: 0.7755042314529419 Test Accuracy: 0.7744399905204773


In [5]:
x_train

array([[   5,   25,  100, ...,   19,  178,   32],
       [   0,    0,    0, ...,   16,  145,   95],
       [   0,    0,    0, ...,    7,  129,  113],
       ...,
       [   0,    0,    0, ...,    4, 3586,    2],
       [   0,    0,    0, ...,   12,    9,   23],
       [   0,    0,    0, ...,  204,  131,    9]], dtype=int32)

In [9]:
from tensorflow import keras
from tensorflow.keras.preprocessing.text import Tokenizer  # Import Tokenizer


In [23]:
tokenizer = Tokenizer(num_words=max_features)
tokenizer.fit_on_texts(new_review)  # Fit the tokenizer on training data

# Preprocess the new review
new_review = "This movie was absolutely fantastic!"  # Example review

new_review = [new_review]  # Wrap the review in a list
new_review = tokenizer.texts_to_sequences(new_review)  # Convert text to sequences

# Pad sequences
new_review = pad_sequences(new_review, maxlen=maxlen)


In [24]:
new_review

array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3,
        4, 5]], dtype=int32)

In [25]:
prediction = model.predict(new_review)[0][0]

if prediction > 0.5:
  print("Sentiment: Positive")
else:
  print("Sentiment: Negative")

Sentiment: Negative
