In [12]:
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout

# Parameters
vocab_size = 10000     # Top 10,000 most frequent words
max_length = 100       # Max length of each review
embedding_dim = 32     # Dimension of the embedding layer
batch_size = 32
epochs = 5

# Load and preprocess the IMDB dataset
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=vocab_size)

# Pad sequences to ensure equal length (max_length)
x_train = pad_sequences(x_train, maxlen=max_length)
x_test = pad_sequences(x_test, maxlen=max_length)

# Build the LSTM-based model
model = Sequential([
    Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=max_length),
    LSTM(64, return_sequences=True),
    Dropout(0.5),
    LSTM(32),
    Dense(1, activation='sigmoid')  # Sigmoid for binary classification (positive/negative)
])

# Compile the model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Summary of the model
print(model.summary())

# Train the model
history = model.fit(
    x_train, y_train,
    epochs=epochs,
    batch_size=batch_size,
    validation_data=(x_test, y_test),
    verbose=1
)

# Evaluate the model on test data
loss, accuracy = model.evaluate(x_test, y_test, verbose=1)
print(f"Test Accuracy: {accuracy * 100:.2f}%")


None
Epoch 1/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m60s[0m 71ms/step - accuracy: 0.7140 - loss: 0.5210 - val_accuracy: 0.8329 - val_loss: 0.3854
Epoch 2/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m51s[0m 65ms/step - accuracy: 0.8949 - loss: 0.2622 - val_accuracy: 0.8468 - val_loss: 0.3533
Epoch 3/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 60ms/step - accuracy: 0.9274 - loss: 0.1926 - val_accuracy: 0.8404 - val_loss: 0.4162
Epoch 4/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 59ms/step - accuracy: 0.9485 - loss: 0.1400 - val_accuracy: 0.8337 - val_loss: 0.4631
Epoch 5/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m47s[0m 60ms/step - accuracy: 0.9621 - loss: 0.1087 - val_accuracy: 0.8326 - val_loss: 0.4891
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 14ms/step - accuracy: 0.8309 - loss: 0.4951
Test Accuracy: 83.26%


In [13]:
model.save('sentiment.h5')



In [14]:
# Prediction function for new text input
from tensorflow.keras.preprocessing.text import Tokenizer
import numpy as np

def predict_sentiment(text):
    tokenizer = Tokenizer(num_words=vocab_size)
    tokenizer.fit_on_texts([text])  # Tokenize the new input text
    sequence = tokenizer.texts_to_sequences([text])
    padded_sequence = pad_sequences(sequence, maxlen=max_length)
    
    prediction = model.predict(padded_sequence)
    sentiment = "Positive" if prediction > 0.5 else "Negative"
    return sentiment

# Example usage of prediction function
sample_text = "This movie was amazing! I really loved it."
print("Predicted Sentiment:", predict_sentiment(sample_text))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 278ms/step
Predicted Sentiment: Negative


In [15]:
# Example usage of prediction function
sample_text = "the movie is worst"
print("Predicted Sentiment:", predict_sentiment(sample_text))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
Predicted Sentiment: Negative
