In [None]:
# Import necessary libraries
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

# Load IMDb dataset from CSV file (fetching only 1200 reviews)
df = pd.read_csv("IMDB Dataset.csv").head(1200)

# Convert sentiment labels to binary (1 for positive, 0 for negative)
df['sentiment'] = df['sentiment'].map({'positive': 1, 'negative': 0})

# Tokenization and padding
max_features = 10000  # Number of words to consider
maxlen = 500  # Maximum sequence length

tokenizer = Tokenizer(num_words=max_features)
tokenizer.fit_on_texts(df['review'])
sequences = tokenizer.texts_to_sequences(df['review'])
x_data = pad_sequences(sequences, maxlen=maxlen)
y_data = df['sentiment'].values

# Split dataset into training (1000 samples) and testing (200 samples)
x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size=200/1200, random_state=42)

# Build LSTM model
model = keras.Sequential([
    keras.layers.Embedding(input_dim=max_features, output_dim=128, input_length=maxlen),  # Embedding Layer
    keras.layers.LSTM(128, dropout=0.2, recurrent_dropout=0.2),  # LSTM Layer
    keras.layers.Dense(1, activation='sigmoid')  # Output layer with sigmoid activation
])

# Compile the model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
batch_size = 32
epochs = 5  # You can increase epochs for better accuracy

history = model.fit(x_train, y_train,
                    batch_size=batch_size,
                    epochs=epochs,
                    validation_data=(x_test, y_test))

# Evaluate the model
score, acc = model.evaluate(x_test, y_test, batch_size=batch_size)
print(f"Test accuracy: {acc}")

# Function to predict sentiment of new reviews
def predict_review(review):
    sequence = tokenizer.texts_to_sequences([review])
    padded_sequence = pad_sequences(sequence, maxlen=maxlen)
    prediction = model.predict(padded_sequence)[0][0]
    sentiment = "Positive" if prediction >= 0.5 else "Negative"
    print(f"Review: {review}")
    print(f"Predicted Sentiment: {sentiment} (Probability: {prediction:.4f})")

# Example Predictions
predict_review("This movie was absolutely fantastic! I loved it.")
predict_review("The movie was terrible and a complete waste of time.")


Epoch 1/5




[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 1s/step - accuracy: 0.4939 - loss: 0.6930 - val_accuracy: 0.6500 - val_loss: 0.6766
Epoch 2/5
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 1s/step - accuracy: 0.7792 - loss: 0.6160 - val_accuracy: 0.6400 - val_loss: 0.6243
Epoch 3/5
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 1s/step - accuracy: 0.8682 - loss: 0.3810 - val_accuracy: 0.7100 - val_loss: 0.5522
Epoch 4/5
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m80s[0m 1s/step - accuracy: 0.9507 - loss: 0.1951 - val_accuracy: 0.7050 - val_loss: 0.6120
Epoch 5/5
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0m 1s/step - accuracy: 0.9787 - loss: 0.0798 - val_accuracy: 0.7250 - val_loss: 0.7080
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 184ms/step - accuracy: 0.7590 - loss: 0.6555
Test accuracy: 0.7250000238418579
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 549ms/step
R