In [1]:

# SENTIMENT ANALYSIS WITH NLP (TF-IDF + LOGISTIC REGRESSION)


# Step 1: Import libraries
import pandas as pd
import numpy as np
import re
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report



In [4]:
# Step 2: Create or load dataset
# You can replace this with your own CSV (for example: pd.read_csv('reviews.csv'))
data = {
    'review': [
        "I love this product! Itâ€™s amazing.",
        "Worst experience ever. Totally disappointed.",
        "Very good quality and fast delivery.",
        "I hate this. Waste of money!",
        "Product was okay, not too great.",
        "Excellent! Iâ€™ll buy again.",
        "Terrible customer service.",
        "Absolutely fantastic! Highly recommend.",
        "Bad quality, not worth the price.",
        "This is my favorite item ever!"
    ],
    'sentiment': [
        'positive', 'negative', 'positive', 'negative',
        'neutral', 'positive', 'negative', 'positive',
        'negative', 'positive'
    ]
}

df = pd.DataFrame(data)
print("Sample Dataset:\n")
print(df.head())



Sample Dataset:

                                         review sentiment
0            I love this product! Itâ€™s amazing.  positive
1  Worst experience ever. Totally disappointed.  negative
2          Very good quality and fast delivery.  positive
3                  I hate this. Waste of money!  negative
4              Product was okay, not too great.   neutral


In [5]:
# Step 3: Clean text (remove special characters, numbers)
def clean_text(text):
    text = text.lower()
    text = re.sub(r'[^a-z\s]', '', text)
    return text

df['cleaned_review'] = df['review'].apply(clean_text)
df['label'] = df['sentiment'].map({'positive': 1, 'negative': 0, 'neutral': 2})



In [6]:
# Step 4: Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(
    df['cleaned_review'], df['label'], test_size=0.2, random_state=42
)


In [7]:
# Step 5: TF-IDF Vectorization
vectorizer = TfidfVectorizer(max_features=1000)
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)


In [8]:
# Step 6: Train Logistic Regression Model
model = LogisticRegression(max_iter=1000)
model.fit(X_train_tfidf, y_train)



In [9]:
# Step 7: Evaluate model
y_pred = model.predict(X_test_tfidf)

print("\nðŸ“Š Model Evaluation:")
print("Accuracy:", round(accuracy_score(y_test, y_pred)*100, 2), "%")
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))



ðŸ“Š Model Evaluation:
Accuracy: 0.0 %

Confusion Matrix:
 [[0 2]
 [0 0]]

Classification Report:
               precision    recall  f1-score   support

           0       0.00      0.00      0.00       2.0
           1       0.00      0.00      0.00       0.0

    accuracy                           0.00       2.0
   macro avg       0.00      0.00      0.00       2.0
weighted avg       0.00      0.00      0.00       2.0



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [10]:
# Step 8: Try custom reviews
sample_reviews = [
    "The product is fantastic and works perfectly!",
    "It stopped working after one day. Horrible experience!",
    "Average performance, okay for the price."
]

sample_cleaned = [clean_text(r) for r in sample_reviews]
sample_tfidf = vectorizer.transform(sample_cleaned)
sample_pred = model.predict(sample_tfidf)

label_map = {1: 'Positive', 0: 'Negative', 2: 'Neutral'}

print("\nðŸ§  Sample Predictions:")
for review, pred in zip(sample_reviews, sample_pred):
    print(f"Review: {review}\nâ†’ Sentiment: {label_map[pred]}\n")


ðŸ§  Sample Predictions:
Review: The product is fantastic and works perfectly!
â†’ Sentiment: Positive

Review: It stopped working after one day. Horrible experience!
â†’ Sentiment: Positive

Review: Average performance, okay for the price.
â†’ Sentiment: Positive

