In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB,GaussianNB
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import classification_report, confusion_matrix

In [3]:
data = {
    'review': [
        "I absolutely love this product! Highly recommend.",  # Positive
        "The battery life is very poor, extremely disappointed.", # Negative
        "It arrived on time, but the size is just okay.",     # Neutral
        "Best purchase ever, excellent quality and price.",   # Positive
        "Not what I expected. The item broke after one use.", # Negative
        "It's decent, nothing spectacular but nothing bad either.", # Neutral
        "Worth every penny, my favorite gadget!",            # Positive
        "terrible waste of money. Do not buy.",              # Negative
        "The instructions were confusing, I'm still not sure.", # Neutral
        "Perfect fit and finish. Five stars!",               # Positive
    ],
    # 2 -> Positive, 1 -> Neutral, 0 -> Negative
    'sentiment': [2, 0, 1, 2, 0, 1, 2, 0, 1, 2]
}

df = pd.DataFrame(data)
df

Unnamed: 0,review,sentiment
0,I absolutely love this product! Highly recommend.,2
1,"The battery life is very poor, extremely disap...",0
2,"It arrived on time, but the size is just okay.",1
3,"Best purchase ever, excellent quality and price.",2
4,Not what I expected. The item broke after one ...,0
5,"It's decent, nothing spectacular but nothing b...",1
6,"Worth every penny, my favorite gadget!",2
7,terrible waste of money. Do not buy.,0
8,"The instructions were confusing, I'm still not...",1
9,Perfect fit and finish. Five stars!,2


In [4]:
X_train, X_test, Y_train, Y_test = train_test_split(df['review'], df['sentiment'], test_size=0.2, random_state=42)
vectorizer = TfidfVectorizer(
    stop_words='english',
    lowercase=True,
    ngram_range=(1,2),
    max_features=500
)

X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

print("A few of the learned features (N-grams):")
print(vectorizer.get_feature_names_out()[:10])

NB = MultinomialNB(alpha=1.0)

NB.fit(X_train_vec, Y_train)
y_pred = NB.predict(X_test_vec)

target_names = ['Negative (0)', 'Neutral (1)', 'Positive (2)']

print("Classification Report:")
print(classification_report(Y_test, y_pred, target_names=target_names))

print("Confusion Matrix:")
print(confusion_matrix(Y_test, y_pred))

A few of the learned features (N-grams):
['absolutely' 'absolutely love' 'arrived' 'arrived time' 'bad' 'best'
 'best purchase' 'broke' 'broke use' 'buy']
Classification Report:
              precision    recall  f1-score   support

Negative (0)       0.00      0.00      0.00       1.0
 Neutral (1)       0.00      0.00      0.00       1.0
Positive (2)       0.00      0.00      0.00       0.0

    accuracy                           0.00       2.0
   macro avg       0.00      0.00      0.00       2.0
weighted avg       0.00      0.00      0.00       2.0

Confusion Matrix:
[[0 0 1]
 [0 0 1]
 [0 0 0]]


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
