In [20]:
from pathlib import Path
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, classification_report
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB, ComplementNB
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression

from gensim.models import Word2Vec
from gensim.utils import simple_preprocess

In [2]:
# Placeholder path to the folder containing the 3 files from Kaggle
DATASET_DIR = Path("/kaggle/input/consumer-reviews-of-amazon-products")

def load_consumer_reviews(dataset_dir: Path) -> pd.DataFrame:
    dataset_dir = Path(dataset_dir)
    files = list(dataset_dir.glob("*.csv"))
    frames = []
    for path in files:
        df = pd.read_csv(path)
        df["__source_file"] = path.name
        frames.append(df)

    data = pd.concat(frames, ignore_index=True)
    print(f"Loaded {len(data):,} rows from {len(files)} files.")
    
    return data

def rating_to_label(rating: float):
    if rating >= 4:
        return "positive"
    if rating == 3:
        return "neutral"
    if rating <= 2:
        return "negative"
    return np.nan

def preprocess_reviews(df: pd.DataFrame) -> pd.DataFrame:
    text = df["reviews.text"].fillna("").str.strip()
    data = pd.DataFrame({
        "review_text": text,
        "rating": pd.to_numeric(df["reviews.rating"], errors="coerce"),
    })
    data = data.dropna(subset=["rating"])
    data = data[data["review_text"] != ""].copy()
    data["label"] = data["rating"].apply(rating_to_label)
    data = data.dropna(subset=["label"])
    return data[["review_text", "label"]]

In [3]:
raw_df = load_consumer_reviews(DATASET_DIR)
dataset = preprocess_reviews(raw_df)
dataset["label"].value_counts()

  df = pd.read_csv(path)


Loaded 67,992 rows from 3 files.


label
positive    62546
neutral      2902
negative     2510
Name: count, dtype: int64

## Train/Validation Split

In [5]:
X_train, X_val, y_train, y_val = train_test_split(
    dataset["review_text"],
    dataset["label"],
    test_size=0.1,
    random_state=42,
    stratify=dataset["label"],
)


## Model Training

### Multinomial Naive Bayes

In [21]:
tfidf_nb = Pipeline([
        ("tfidf",TfidfVectorizer(lowercase=True,stop_words="english",ngram_range=(1, 2),min_df=2)),
        ("nb", MultinomialNB())
])

tfidf_nb.fit(X_train, y_train)
val_pred = tfidf_nb.predict(X_val)
print("Accuracy:", accuracy_score(y_val, val_pred))
print("Macro F1:", f1_score(y_val, val_pred, average="macro"))
print(classification_report(y_val, val_pred))

Accuracy: 0.9218658034137728
Macro F1: 0.3451286283648453
              precision    recall  f1-score   support

    negative       1.00      0.04      0.07       251
     neutral       1.00      0.00      0.01       290
    positive       0.92      1.00      0.96      6255

    accuracy                           0.92      6796
   macro avg       0.97      0.35      0.35      6796
weighted avg       0.93      0.92      0.89      6796



### Complement Naive Bayes

In [19]:
tfidf_nb1 = Pipeline([
        ("tfidf",TfidfVectorizer(lowercase=True,stop_words="english",ngram_range=(1, 2),min_df=2)),
        ("nb", ComplementNB())
])

tfidf_nb1.fit(X_train, y_train)
val_pred = tfidf_nb1.predict(X_val)
print("Accuracy:", accuracy_score(y_val, val_pred))
print("Macro F1:", f1_score(y_val, val_pred, average="macro"))
print(classification_report(y_val, val_pred))

Accuracy: 0.9393761035903473
Macro F1: 0.6306337678457085
              precision    recall  f1-score   support

    negative       0.64      0.46      0.54       251
     neutral       0.81      0.25      0.38       290
    positive       0.95      0.99      0.97      6255

    accuracy                           0.94      6796
   macro avg       0.80      0.57      0.63      6796
weighted avg       0.93      0.94      0.93      6796



### Logistic Regression

In [8]:
tfidf_nb2 = Pipeline([
        ("tfidf",TfidfVectorizer(lowercase=True,stop_words="english",ngram_range=(1, 2),min_df=2)),
        ("logreg", LogisticRegression(class_weight='balanced', max_iter=1000))
])

tfidf_nb2.fit(X_train, y_train)
val_pred = tfidf_nb2.predict(X_val)
print("Accuracy:", accuracy_score(y_val, val_pred))
print("Macro F1:", f1_score(y_val, val_pred, average="macro"))
print(classification_report(y_val, val_pred))

Accuracy: 0.9192171865803413
Macro F1: 0.6861651618752745
              precision    recall  f1-score   support

    negative       0.53      0.77      0.63       251
     neutral       0.39      0.59      0.47       290
    positive       0.98      0.94      0.96      6255

    accuracy                           0.92      6796
   macro avg       0.63      0.77      0.69      6796
weighted avg       0.94      0.92      0.93      6796



### Logistic Regression + Custom Class Weights

In [18]:
custom_weights = {
    'positive': 1.0, 
    'neutral': 10.0, 
    'negative': 13.0
}

tfidf_nb3 = Pipeline([
        ("tfidf",TfidfVectorizer(lowercase=True, stop_words="english", ngram_range=(1, 2), min_df=2)),
        ("logreg", LogisticRegression(class_weight=custom_weights, max_iter=1000))
])

tfidf_nb3.fit(X_train, y_train)
val_pred = tfidf_nb3.predict(X_val)
print("Accuracy:", accuracy_score(y_val, val_pred))
print("Macro F1:", f1_score(y_val, val_pred, average="macro"))
print(classification_report(y_val, val_pred))

Accuracy: 0.9471748087110065
Macro F1: 0.7412019674352468
              precision    recall  f1-score   support

    negative       0.66      0.75      0.70       251
     neutral       0.56      0.54      0.55       290
    positive       0.98      0.97      0.98      6255

    accuracy                           0.95      6796
   macro avg       0.73      0.75      0.74      6796
weighted avg       0.95      0.95      0.95      6796



## Inference

In [23]:
def classify_sentiment(texts):
    if isinstance(texts, str):
        texts = [texts]
    return tfidf_nb3.predict(pd.Series(texts)).tolist()

sample_texts = [
    "Battery life is amazing and the screen is bright.",
    "It stopped working after a week.",
    "It's okay, nothing special.",
]
classify_sentiment(sample_texts)

['positive', 'negative', 'neutral']