In [None]:
import re
import nltk
import torch
import pandas as pd
from datasets import load_dataset, Dataset
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, f1_score
from transformers import (
    AutoTokenizer, AutoModelForSequenceClassification,
    Trainer, TrainingArguments, pipeline
)

nltk.download("stopwords")


class SentimentAnalyzer:
    def __init__(self):
        self.device = 0 if torch.cuda.is_available() else -1
        print(f"Device set to use cuda:{self.device}" if self.device == 0 else "Using CPU.")

        self.stopwords = set(nltk.corpus.stopwords.words("english")) - {'not', 'no', 'nor', 'but'}
        self.tfidf = TfidfVectorizer(max_features=10000, ngram_range=(1, 2), stop_words=list(self.stopwords))
        self.lr_model = LogisticRegression(max_iter=1000, solver="saga", class_weight="balanced")
        self.tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
        self.transformer_model = AutoModelForSequenceClassification.from_pretrained(
            "distilbert-base-uncased",
            num_labels=3,
            id2label={0: "negative", 1: "neutral", 2: "positive"},
            label2id={"negative": 0, "neutral": 1, "positive": 2}
        )

    def clean_text(self, text):
        text = text.lower()
        text = re.sub(r"http\S+|@\w+|[^\w\s]|\d+", " ", text)
        text = re.sub(r"\s+", " ", text).strip()
        return " ".join([w for w in text.split() if w not in self.stopwords])

    def prepare_data(self):
        print("🛠️ Preparing training data for traditional model...")
        dataset = load_dataset("yelp_review_full")
        df = pd.DataFrame(dataset["train"].shuffle(seed=42).select(range(30000)))

        def map_labels(label):
            if label <= 1: return 0  # negative
            elif label == 2: return 1  # neutral
            else: return 2  # positive

        df["label"] = df["label"].apply(map_labels)

        # Balance the dataset
        balanced_df = pd.concat([
            df[df["label"] == 0].sample(5000),
            df[df["label"] == 1].sample(5000),
            df[df["label"] == 2].sample(5000)
        ])
        balanced_df["cleaned_text"] = balanced_df["text"].apply(self.clean_text)

        train_df, test_df = train_test_split(balanced_df, test_size=0.2, random_state=42)
        return train_df, test_df

    def train_traditional_model(self, train_df, test_df):
        print("🤖 Training traditional TF-IDF + LR model...")
        X_train = self.tfidf.fit_transform(train_df["cleaned_text"])
        X_test = self.tfidf.transform(test_df["cleaned_text"])
        self.lr_model.fit(X_train, train_df["label"])
        preds = self.lr_model.predict(X_test)
        print("\n📈 Traditional Model Report:\n", classification_report(test_df["label"], preds))

    def train_transformer_model(self, train_df, test_df):
        print("🧠 Fine-tuning Transformer model...")
        train_ds = Dataset.from_pandas(train_df[["text", "label"]])
        test_ds = Dataset.from_pandas(test_df[["text", "label"]])

        def tokenize(batch):
            return self.tokenizer(batch["text"], padding="max_length", truncation=True, max_length=128)

        train_ds = train_ds.map(tokenize, batched=True)
        test_ds = test_ds.map(tokenize, batched=True)

        train_ds.set_format("torch", columns=["input_ids", "attention_mask", "label"])
        test_ds.set_format("torch", columns=["input_ids", "attention_mask", "label"])

        def compute_metrics(p):
            preds = p.predictions.argmax(-1)
            f1s = f1_score(p.label_ids, preds, average=None)
            return {
                "accuracy": (preds == p.label_ids).mean(),
                "f1_negative": f1s[0],
                "f1_neutral": f1s[1],
                "f1_positive": f1s[2]
            }

        trainer = Trainer(
            model=self.transformer_model,
            args=TrainingArguments(
                output_dir="./results",
                evaluation_strategy="epoch",
                save_strategy="epoch",
                per_device_train_batch_size=32,
                per_device_eval_batch_size=32,
                num_train_epochs=4,
                learning_rate=2e-5,
                weight_decay=0.01,
                logging_steps=100,
                load_best_model_at_end=True,
                report_to="none",
                fp16=torch.cuda.is_available()
            ),
            train_dataset=train_ds,
            eval_dataset=test_ds,
            compute_metrics=compute_metrics
        )
        trainer.train()

        self.transformer_pipe = pipeline(
            "text-classification",
            model=self.transformer_model,
            tokenizer=self.tokenizer,
            device=self.device,
            top_k=3
        )

    def get_transformer_label(self, scores):
        scores = {s["label"].lower(): s["score"] for s in scores}
        return max(scores, key=scores.get), {k: f"{v:.2f}" for k, v in scores.items()}

    def analyze(self, text):
        cleaned = self.clean_text(text)
        lr_pred = self.lr_model.predict(self.tfidf.transform([cleaned]))[0]
        tf_scores = self.transformer_pipe(text)[0]
        tf_label, tf_conf = self.get_transformer_label(tf_scores)

        return {
            "text": text,
            "traditional": ["NEGATIVE", "NEUTRAL", "POSITIVE"][lr_pred],
            "transformer": tf_label.upper(),
            "confidence": tf_conf
        }

    def test_cases(self):
        print("\n🔍 Running on 15 diverse test cases:\n")
        examples = [
            # NEGATIVE
            "The product was terrible and completely useless.",
            "I had a bad experience with the service.",
            "Not worth the money at all.",
            "The item arrived broken and support was unhelpful.",
            "Extremely disappointed by the quality.",

            # NEUTRAL
            "It was okay, nothing remarkable.",
            "The service was average, could be better.",
            "Just fine, neither good nor bad.",
            "I’m indifferent about the product.",
            "Nothing to complain or praise about.",

            # POSITIVE
            "Absolutely loved it, will buy again!",
            "Fantastic experience from start to end.",
            "Very satisfied with the quality and service.",
            "Exceeded my expectations in every way.",
            "A wonderful product, highly recommended!"
        ]
        for text in examples:
            res = self.analyze(text)
            print(f"📝 {res['text']}")
            print(f"   Traditional: {res['traditional']}")
            print(f"   Transformer: {res['transformer']}")
            print(f"   Confidence: {res['confidence']}\n")


if __name__ == "__main__":
    analyzer = SentimentAnalyzer()
    train_df, test_df = analyzer.prepare_data()
    analyzer.train_traditional_model(train_df, test_df)
    analyzer.train_transformer_model(train_df, test_df)
    analyzer.test_cases()

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Device set to use cuda:0


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


🛠️ Preparing training data for traditional model...
🤖 Training traditional TF-IDF + LR model...

📈 Traditional Model Report:
               precision    recall  f1-score   support

           0       0.81      0.76      0.78      1037
           1       0.60      0.64      0.62       970
           2       0.75      0.74      0.75       993

    accuracy                           0.72      3000
   macro avg       0.72      0.71      0.72      3000
weighted avg       0.72      0.72      0.72      3000

🧠 Fine-tuning Transformer model...


Map:   0%|          | 0/12000 [00:00<?, ? examples/s]

Map:   0%|          | 0/3000 [00:00<?, ? examples/s]



Epoch,Training Loss,Validation Loss,Accuracy,F1 Negative,F1 Neutral,F1 Positive
1,0.6967,0.642362,0.721,0.788209,0.608558,0.754995
2,0.5794,0.634824,0.724,0.791978,0.613086,0.760313
3,0.4605,0.681172,0.707667,0.768453,0.595335,0.757073
4,0.3732,0.720636,0.714,0.780488,0.605542,0.754198


Device set to use cuda:0



🔍 Running on 15 diverse test cases:

📝 The product was terrible and completely useless.
   Traditional: NEGATIVE
   Transformer: NEGATIVE
   Confidence: {'negative': '0.96', 'neutral': '0.04', 'positive': '0.01'}

📝 I had a bad experience with the service.
   Traditional: NEGATIVE
   Transformer: NEGATIVE
   Confidence: {'negative': '0.89', 'neutral': '0.10', 'positive': '0.01'}

📝 Not worth the money at all.
   Traditional: NEGATIVE
   Transformer: NEGATIVE
   Confidence: {'negative': '0.95', 'neutral': '0.04', 'positive': '0.01'}

📝 The item arrived broken and support was unhelpful.
   Traditional: NEGATIVE
   Transformer: NEGATIVE
   Confidence: {'negative': '0.95', 'neutral': '0.04', 'positive': '0.01'}

📝 Extremely disappointed by the quality.
   Traditional: NEGATIVE
   Transformer: NEGATIVE
   Confidence: {'negative': '0.92', 'neutral': '0.07', 'positive': '0.01'}

📝 It was okay, nothing remarkable.
   Traditional: NEUTRAL
   Transformer: NEUTRAL
   Confidence: {'neutral': '0.6