In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Embedding, Dropout, Conv1D, MaxPooling1D, Flatten
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Load dataset
df = pd.read_csv(r"C:\Users\Lenovo\Downloads\IMDB Dataset.csv.zip")

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(df['review'], df['sentiment'], test_size=0.2, random_state=42)

# Tokenization
tokenizer = Tokenizer(num_words=5000)
tokenizer.fit_on_texts(X_train)

# Convert text to sequences
X_train_seq = tokenizer.texts_to_sequences(X_train)
X_test_seq = tokenizer.texts_to_sequences(X_test)

# Pad sequences
max_length = 200
X_train_pad = pad_sequences(X_train_seq, maxlen=max_length)
X_test_pad = pad_sequences(X_test_seq, maxlen=max_length)

# Encode sentiment labels
le = LabelEncoder()
y_train_enc = le.fit_transform(y_train)
y_test_enc = le.transform(y_test)

# Define CNN-LSTM model
model = Sequential()
model.add(Embedding(input_dim=5000, output_dim=100))  # Embedding Layer
model.add(Conv1D(filters=128, kernel_size=5, activation='relu'))  # CNN Feature Extraction
model.add(MaxPooling1D(pool_size=2))  # Downsampling
model.add(LSTM(100, dropout=0.2, return_sequences=True))  # LSTM Layer
model.add(LSTM(100, dropout=0.2))  # Another LSTM Layer
model.add(Dense(1, activation='sigmoid'))  # Output Layer

# Compile model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train model and store history
history = model.fit(X_train_pad, y_train_enc, epochs=5, batch_size=32, validation_data=(X_test_pad, y_test_enc))

# Evaluate model performance
loss, accuracy = model.evaluate(X_test_pad, y_test_enc)
print("Accuracy:", accuracy)

# Get user input for sentiment analysis
example_text = input("Enter a movie review: ")
example_seq = tokenizer.texts_to_sequences([example_text])
example_pad = pad_sequences(example_seq, maxlen=max_length)

# Make prediction
prediction = model.predict(example_pad)
print("Prediction:", "Positive" if prediction > 0.5 else "Negative")

# Plot training history
plt.figure(figsize=(6,4))
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Model Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()


Epoch 1/5
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m352s[0m 275ms/step - accuracy: 0.7712 - loss: 0.4459 - val_accuracy: 0.8982 - val_loss: 0.2602
Epoch 2/5
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m355s[0m 284ms/step - accuracy: 0.9059 - loss: 0.2378 - val_accuracy: 0.8856 - val_loss: 0.2775
Epoch 3/5
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m322s[0m 257ms/step - accuracy: 0.9279 - loss: 0.1888 - val_accuracy: 0.8967 - val_loss: 0.2992
Epoch 4/5
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m355s[0m 284ms/step - accuracy: 0.9444 - loss: 0.1499 - val_accuracy: 0.8982 - val_loss: 0.2607
Epoch 5/5
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m339s[0m 271ms/step - accuracy: 0.9650 - loss: 0.1052 - val_accuracy: 0.8950 - val_loss: 0.2865
[1m  9/313[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m25s[0m 83ms/step - accuracy: 0.8908 - loss: 0.2592