In [1]:
import numpy as np
from tensorflow.keras.datasets import imdb
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, Flatten
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.layers import Dropout




In [2]:
num_words = 60000
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words = num_words)

In [3]:
word_index = imdb.get_word_index()

In [4]:
reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])

In [5]:
def decode_review(review):
    return ' '.join([reverse_word_index.get(i - 3, '?') for i in review])

In [6]:
max_length = 300
X_train = pad_sequences(X_train, maxlen = max_length)
X_test = pad_sequences(X_test, maxlen = max_length)
X_train

array([[    0,     0,     0, ...,    19,   178,    32],
       [    0,     0,     0, ...,    16,   145,    95],
       [    0,     0,     0, ...,     7,   129,   113],
       ...,
       [    0,     0,     0, ...,     4,  3586, 22459],
       [    0,     0,     0, ...,    12,     9,    23],
       [    0,     0,     0, ...,   204,   131,     9]])

In [7]:
embedding_dim = 100
model = Sequential([
    Embedding(input_dim=num_words, output_dim=embedding_dim, input_length=max_length), Flatten(), 
    Dense(units=128, activation='relu'), Dropout(0.5), Dense(units=1, activation='sigmoid') 
])




In [8]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])




In [9]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 300, 100)          6000000   
                                                                 
 flatten (Flatten)           (None, 30000)             0         
                                                                 
 dense (Dense)               (None, 128)               3840128   
                                                                 
 dropout (Dropout)           (None, 128)               0         
                                                                 
 dense_1 (Dense)             (None, 1)                 129       
                                                                 
Total params: 9840257 (37.54 MB)
Trainable params: 9840257 (37.54 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [10]:
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.1)

Epoch 1/10


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x22e1f177850>

In [11]:
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print("Test Accuracy", test_accuracy)

Test Accuracy 0.8668000102043152


In [12]:
train_loss, train_accuracy = model.evaluate(X_train, y_train)
print("Train Accuracy", train_accuracy)

Train Accuracy 0.9879599809646606


In [13]:
tokenizer = Tokenizer(num_words=num_words)
tokenizer.fit_on_texts(reverse_word_index.values())

In [14]:
def preprocess_input(review):
    review = review.lower()
    review = review.split()
    review = [tokenizer.word_index.get(word, 2) for word in review]  # Use tokenizer
    review = pad_sequences([review], maxlen=max_length)
    return review

In [15]:
user_review = input("Enter your movie review: ")

Enter your movie review: Would watch again


In [16]:
user_review = preprocess_input(user_review)

In [17]:
predicted_sentiment = model.predict(user_review)[0][0]



In [18]:
if predicted_sentiment >= 0.5:
    print("Predicted Sentiment: Positive")
else:
    print("Predicted Sentiment: Negative")

Predicted Sentiment: Positive
