In [1]:
import pandas as pd
import numpy as np
import re

from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix


In [2]:
data = {
    "review": [
        "The product is amazing and works perfectly",
        "Very bad quality, totally disappointed",
        "Excellent service and fast delivery",
        "Worst experience ever",
        "I am very happy with this purchase",
        "The product stopped working in two days"
    ],
    "sentiment": [1, 0, 1, 0, 1, 0]  # 1 = Positive, 0 = Negative
}

df = pd.DataFrame(data)
df


Unnamed: 0,review,sentiment
0,The product is amazing and works perfectly,1
1,"Very bad quality, totally disappointed",0
2,Excellent service and fast delivery,1
3,Worst experience ever,0
4,I am very happy with this purchase,1
5,The product stopped working in two days,0


In [3]:
def clean_text(text):
    text = text.lower()
    text = re.sub(r"[^a-z\s]", "", text)
    return text

df["cleaned_review"] = df["review"].apply(clean_text)
df


Unnamed: 0,review,sentiment,cleaned_review
0,The product is amazing and works perfectly,1,the product is amazing and works perfectly
1,"Very bad quality, totally disappointed",0,very bad quality totally disappointed
2,Excellent service and fast delivery,1,excellent service and fast delivery
3,Worst experience ever,0,worst experience ever
4,I am very happy with this purchase,1,i am very happy with this purchase
5,The product stopped working in two days,0,the product stopped working in two days


In [4]:
X = df["cleaned_review"]
y = df["sentiment"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


In [5]:
vectorizer = TfidfVectorizer(stop_words="english")

X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)


In [6]:
model = LogisticRegression()
model.fit(X_train_tfidf, y_train)


In [7]:
y_pred = model.predict(X_test_tfidf)


In [8]:
print("Accuracy:", accuracy_score(y_test, y_pred))


Accuracy: 0.5


In [9]:
print(classification_report(y_test, y_pred))


              precision    recall  f1-score   support

           0       0.50      1.00      0.67         1
           1       0.00      0.00      0.00         1

    accuracy                           0.50         2
   macro avg       0.25      0.50      0.33         2
weighted avg       0.25      0.50      0.33         2



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [10]:
print(confusion_matrix(y_test, y_pred))


[[1 0]
 [1 0]]


In [11]:
new_review = ["The product quality is very good"]
new_review_clean = [clean_text(new_review[0])]
new_review_tfidf = vectorizer.transform(new_review_clean)

prediction = model.predict(new_review_tfidf)

print("Sentiment:", "Positive" if prediction[0] == 1 else "Negative")


Sentiment: Negative
