In [None]:
# Import libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
import re
import nltk
from nltk.corpus import stopwords
nltk.download('stopwords')

# Sample dataset (replace with IMDB dataset)
data = {
    'review': [
        "This movie was fantastic and thrilling!",
        "Terrible acting and boring plot.",
        "I loved the cinematography and story.",
        "Waste of time, really disappointing.",
        "An amazing experience, highly recommend!"
    ],
    'sentiment': [1, 0, 1, 0, 1]  # 1: positive, 0: negative
}
df = pd.DataFrame(data)

# Text preprocessing function
def preprocess_text(text):
    text = text.lower()
    text = re.sub(r'[^\w\s]', '', text)  # Remove punctuation
    stop_words = set(stopwords.words('english'))
    text = ' '.join(word for word in text.split() if word not in stop_words)
    return text

# Apply preprocessing
df['cleaned_review'] = df['review'].apply(preprocess_text)

# Split the dataset
X = df['cleaned_review']
y = df['sentiment']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# TF-IDF Vectorization
tfidf = TfidfVectorizer(max_features=5000)
X_train_tfidf = tfidf.fit_transform(X_train)
X_test_tfidf = tfidf.transform(X_test)

# Train Logistic Regression model
lr_model = LogisticRegression(random_state=42)
lr_model.fit(X_train_tfidf, y_train)

# Predict and evaluate
y_pred = lr_model.predict(X_test_tfidf)
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy:.2f}")
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=['Negative', 'Positive']))

# Example prediction
sample_review = ["This was an awesome movie!"]
sample_cleaned = [preprocess_text(review) for review in sample_review]
sample_tfidf = tfidf.transform(sample_cleaned)
prediction = lr_model.predict(sample_tfidf)
print(f"\nSample Review Sentiment: {'Positive' if prediction[0] == 1 else 'Negative'}")