In [None]:
!pip install pandas
!pip install tensorflow
!pip install scikit-learn
!pip install matplotlib

In [None]:
# Importing necessary libraries
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import matplotlib.pyplot as plt

In [None]:
# Step 1: Load the dataset
df = pd.read_csv('/content/IMDB Dataset.csv')

In [None]:
# Step 2: Encode sentiment labels to 0 and 1
label_encoder = LabelEncoder()
df['sentiment'] = label_encoder.fit_transform(df['sentiment'])

In [None]:
# Step 3: Define X (reviews) and y (labels)
X = df['review'].values
y = df['sentiment'].values

In [None]:
# Step 4: Tokenization and padding
vocab_size = 10000
max_length = 200

tokenizer = Tokenizer(num_words=vocab_size, oov_token="<OOV>")
tokenizer.fit_on_texts(X)
X_seq = tokenizer.texts_to_sequences(X)
X_pad = pad_sequences(X_seq, maxlen=max_length, padding='post', truncating='post')

In [None]:
# Step 5: Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X_pad, y, test_size=0.2, random_state=42)

In [None]:
# Step 6: Optimized Model (CNN + Dense)
model = models.Sequential([
    layers.Embedding(input_dim=vocab_size, output_dim=64, input_length=max_length),
    layers.Conv1D(128, 5, activation='relu'),
    layers.GlobalMaxPooling1D(),
    layers.Dense(64, activation='relu'),
    layers.Dense(1, activation='sigmoid')  # Binary classification
])

In [None]:
# Step 7: Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
# Step 8: Train the model
history = model.fit(X_train, y_train, epochs=10, batch_size=64, validation_data=(X_test, y_test))

In [None]:
# Step 9: Evaluate performance
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy*100:.2f}%")

In [None]:
# Step 10: Plot Accuracy
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Training vs Validation Accuracy')
plt.legend()
plt.show()

In [None]:
# Step 11: Predict sentiment from user review
def predict_review(review_text):
    sequence = tokenizer.texts_to_sequences([review_text])
    padded = pad_sequences(sequence, maxlen=max_length, padding='post', truncating='post')
    prediction = model.predict(padded)[0][0]
    sentiment = "Positive" if prediction >= 0.5 else "Negative"
    print(f"Predicted Sentiment: {sentiment}")

In [None]:
# Step 12: Take input and predict
user_review = input("Enter a movie review: ")
predict_review(user_review)