In [5]:
import numpy as np
from tensorflow import keras
from tensorflow.keras.datasets import imdb
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, Flatten
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Load IMDB dataset with top 10,000 words
max_words = 10000
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_words)

# Pad sequences to a fixed length (e.g., 100 words)
max_len = 100
x_train = pad_sequences(x_train, maxlen=max_len)
x_test = pad_sequences(x_test, maxlen=max_len)

# Define the DNN Model
model = Sequential()
model.add(Embedding(max_words, 64))
model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the Model
batch_size = 32
epochs = 10  # Increase the number of epochs
model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(x_test, y_test))

# Evaluate the Model
loss, accuracy = model.evaluate(x_test, y_test)
print(f"Test Accuracy: {accuracy*100:.2f}%")

# Function to classify user input as positive or negative review
def classify_review(input_text):
    # Tokenize and pad user input
    word_index = imdb.get_word_index()
    input_sequence = [word_index[word] if word in word_index and word_index[word] < max_words else 0 for word in input_text.split()]
    input_sequence = pad_sequences([input_sequence], maxlen=max_len)
    
    # Predict sentiment
    prediction = model.predict(input_sequence)
    if prediction[0][0] >= 0.5:
        return "Positive review"
    else:
        return "Negative review"

# Test the classification function with user input
user_input = input("Enter a movie review: ")
classification = classify_review(user_input)
print("Classification:", classification)
'''Importing Libraries:
numpy is a library for numerical operations in Python.
tensorflow.keras is part of TensorFlow, a popular deep learning framework. Keras is a high-level API that allows for easy construction, training, and evaluation of neural networks.
Loading IMDb Dataset:
The IMDb dataset is a collection of movie reviews labeled as positive or negative sentiments.
num_words=max_words restricts the dataset to the top 10,000 most frequent words, reducing complexity while retaining important information.
Preprocessing Data:
pad_sequences is used to ensure all sequences (movie reviews) have the same length.
This is crucial for training neural networks because they typically expect fixed-size inputs.
Defining the Neural Network Model:
Sequential() creates a linear stack of layers for the model.
Embedding layer converts word indices into dense vectors of fixed size (64 in this case), which helps in capturing semantic relationships between words.
Flatten layer reshapes the 2D output of the embedding layer into a 1D vector for further processing.
Dense layers are fully connected layers with a specified number of units and activation functions.
The final Dense layer with sigmoid activation performs binary classification, predicting whether a review is positive or negative.
Compiling and Training the Model:
model.compile configures the learning process with an optimizer (adam), loss function (binary_crossentropy for binary classification), and metrics to monitor (accuracy in this case).
model.fit trains the model using the training data (x_train and y_train) for a specified number of epochs (10 in this case) and a batch size of 32. Validation data (x_test and y_test) are used to evaluate the model's performance during training.
Evaluating the Model:
model.evaluate computes the loss and metrics (accuracy in this case) of the model on the test data (x_test and y_test).
The results are printed to assess how well the model generalizes to unseen data.
Classification Function:
classify_review is a function that takes user input (a movie review) and predicts its sentiment (positive or negative).
It tokenizes the input text using the IMDb dataset's word index, converts it into sequences, pads it to a fixed length, and then uses the trained model to predict the sentiment.
Testing the Classification Function:
The code prompts the user to enter a movie review.
It calls the classify_review function to classify the user's input as a positive or negative review based on the trained model.
The result is printed, indicating whether the input review is classified as positive or negative.
In summary, this code demonstrates the entire pipeline of building a sentiment analysis model using a neural network, training it on IMDb movie review data, and using it to classify user-provided text into positive or negative sentiments. It showcases fundamental concepts in natural language processing (NLP) and deep learning for text classification tasks.'''


Epoch 1/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 18ms/step - accuracy: 0.7009 - loss: 0.5275 - val_accuracy: 0.8479 - val_loss: 0.3414
Epoch 2/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 15ms/step - accuracy: 0.9563 - loss: 0.1225 - val_accuracy: 0.8143 - val_loss: 0.5136
Epoch 3/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 16ms/step - accuracy: 0.9964 - loss: 0.0163 - val_accuracy: 0.8224 - val_loss: 0.6768
Epoch 4/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 14ms/step - accuracy: 0.9994 - loss: 0.0026 - val_accuracy: 0.8278 - val_loss: 0.7534
Epoch 5/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 16ms/step - accuracy: 1.0000 - loss: 3.9798e-04 - val_accuracy: 0.8285 - val_loss: 0.7901
Epoch 6/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 18ms/step - accuracy: 1.0000 - loss: 1.0764e-04 - val_accuracy: 0.8295 - val_loss: 0.8195
Epoch 7/

In [37]:
# Test the classification function with user input
user_input = input("Enter a movie review: ")
classification = classify_review(user_input)
print("Classification:", classification)


Enter a movie review: The soundtrack perfectly complements the mood of each scene, enhancing the overall impact.
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
Classification: Positive review
