In [1]:
import numpy as np
from tensorflow.keras.datasets import imdb
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, Flatten
from tensorflow.keras.preprocessing.sequence import pad_sequences




In [2]:
# Load the IMDb dataset
vocab_size = 10000
max_len = 300
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=vocab_size)
# Pad sequences to ensure uniform length
X_train = pad_sequences(X_train, maxlen=max_len)
X_test = pad_sequences(X_test, maxlen=max_len)


In [3]:

# Build the neural network model
model = Sequential([
    Embedding(input_dim=vocab_size, output_dim=32, input_length=max_len),
    Flatten(),
    Dense(16, activation='relu'),
    Dense(1, activation='sigmoid')
])


In [4]:

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, epochs=5, batch_size=128, validation_split=0.2)


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [5]:

# Evaluate the model
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print("Test Loss:", test_loss)
print("Test Accuracy:", test_accuracy)


Test Loss: 0.38720273971557617
Test Accuracy: 0.8705199956893921


In [6]:
predictions = model.predict(X_test)



In [7]:
predictions

array([[0.07445277],
       [0.9999948 ],
       [0.38799348],
       ...,
       [0.00643004],
       [0.09357999],
       [0.6269142 ]], dtype=float32)

In [8]:
def preprocess_input(text):
    # Load IMDb word index
    word_index = imdb.get_word_index()
    # Shift indices by 3 since IMDb dataset reserves first indices for special tokens
    word_index = {word: index + 3 for word, index in word_index.items()}
    
    # Convert input text to lowercase and split into words
    words = text.lower().split()
    
    # Convert words to indices using IMDb word index
    indices = [word_index[word] if word in word_index and word_index[word] < vocab_size else 2 for word in words]
    
    # Pad sequences to ensure uniform length
    padded_sequence = pad_sequences([indices], maxlen=max_len)
    
    return padded_sequence


In [9]:

# User input
user_input = input("Enter a review: ")

# Preprocess user input
processed_input = preprocess_input(user_input)

# Predict sentiment
prediction = model.predict(processed_input)

# Print prediction
print("Predicted sentiment (0 = negative, 1 = positive):", prediction[0][0])


Enter a review: The movie was really awful, the characters and their dialogues were not at all in sync. The story itself was a fake one.
Predicted sentiment (0 = negative, 1 = positive): 0.14440401
