In [1]:
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout

# Parameters
vocab_size = 10000     # Top 10,000 most frequent words
max_length = 100       # Max length of each review
embedding_dim = 32     # Dimension of the embedding layer
batch_size = 32
epochs = 5

# Load and preprocess the IMDB dataset
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=vocab_size)

# Pad sequences to ensure equal length (max_length)
x_train = pad_sequences(x_train, maxlen=max_length)
x_test = pad_sequences(x_test, maxlen=max_length)

# Build the LSTM-based model
model = Sequential([
    Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=max_length),
    LSTM(64, return_sequences=True),
    Dropout(0.5),
    LSTM(32),
    Dense(1, activation='sigmoid')  # Sigmoid for binary classification (positive/negative)
])

# Compile the model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Summary of the model
print(model.summary())

# Train the model
history = model.fit(
    x_train, y_train,
    epochs=epochs,
    batch_size=batch_size,
    validation_data=(x_test, y_test),
    verbose=1
)

# Evaluate the model on test data
loss, accuracy = model.evaluate(x_test, y_test, verbose=1)
print(f"Test Accuracy: {accuracy * 100:.2f}%")




None
Epoch 1/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m55s[0m 65ms/step - accuracy: 0.7351 - loss: 0.5021 - val_accuracy: 0.8359 - val_loss: 0.3601
Epoch 2/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 57ms/step - accuracy: 0.9004 - loss: 0.2529 - val_accuracy: 0.8422 - val_loss: 0.3492
Epoch 3/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 58ms/step - accuracy: 0.9279 - loss: 0.1932 - val_accuracy: 0.8425 - val_loss: 0.4118
Epoch 4/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 62ms/step - accuracy: 0.9493 - loss: 0.1394 - val_accuracy: 0.8270 - val_loss: 0.4754
Epoch 5/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 62ms/step - accuracy: 0.9613 - loss: 0.1079 - val_accuracy: 0.8323 - val_loss: 0.4893
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 15ms/step - accuracy: 0.8325 - loss: 0.4951
Test Accuracy: 83.23%
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m

In [2]:
model.save('sentiment.h5')



In [None]:
# Prediction function for new text input
from tensorflow.keras.preprocessing.text import Tokenizer
import numpy as np

def predict_sentiment(text):
    tokenizer = Tokenizer(num_words=vocab_size)
    tokenizer.fit_on_texts([text])  # Tokenize the new input text
    sequence = tokenizer.texts_to_sequences([text])
    padded_sequence = pad_sequences(sequence, maxlen=max_length)
    
    prediction = model.predict(padded_sequence)
    sentiment = "Positive" if prediction > 0.5 else "Negative"
    return sentiment

# Example usage of prediction function
sample_text = "This movie was amazing! I really loved it."
print("Predicted Sentiment:", predict_sentiment(sample_text))