In [2]:
# 1. Import libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import re
import string

# 2. Load Dataset (Use your own CSV or load sample)
# Sample dataset format: columns=["review", "sentiment"]
# Sentiment = 1 for Positive, 0 for Negative
df = pd.read_csv("customer_reviews.csv")  # Change to your file path

# 3. Text Preprocessing function
def clean_text(text):
    text = text.lower()                           # Lowercase
    text = re.sub(r'\d+', '', text)               # Remove numbers
    text = text.translate(str.maketrans('', '', string.punctuation))  # Remove punctuation
    text = re.sub(r'\s+', ' ', text).strip()      # Remove extra spaces
    return text

df['clean_review'] = df['review'].apply(clean_text)

# 4. Split the dataset
X_train, X_test, y_train, y_test = train_test_split(
    df['clean_review'], df['sentiment'], test_size=0.2, random_state=42)

# 5. TF-IDF Vectorization
tfidf = TfidfVectorizer(stop_words='english', max_features=5000)
X_train_tfidf = tfidf.fit_transform(X_train)
X_test_tfidf = tfidf.transform(X_test)

# 6. Train Logistic Regression model
model = LogisticRegression()
model.fit(X_train_tfidf, y_train)

# 7. Predictions
y_pred = model.predict(X_test_tfidf)

# 8. Evaluation
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))


Accuracy: 1.0

Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00         1
           1       1.00      1.00      1.00         1

    accuracy                           1.00         2
   macro avg       1.00      1.00      1.00         2
weighted avg       1.00      1.00      1.00         2


Confusion Matrix:
 [[1 0]
 [0 1]]
