# Sentiment Analysis with RoBERTa

In [4]:
! pip freeze | findstr torch

#Check this and make sure you have torch 2.6 or greater

torch==2.7.1




In [5]:
# !pip install torch>=2.7.1

In [6]:
import pandas as pd
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
from sklearn.metrics import classification_report

  from .autonotebook import tqdm as notebook_tqdm


In [7]:
# Step 1: Load a 3-class sentiment model (supports positive, neutral, negative)
model_name = "cardiffnlp/twitter-roberta-base-sentiment"  # 3-class sentiment
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

In [8]:
# Step 2: Define label mapping
id2label = {0: "negative", 1: "neutral", 2: "positive"}

In [9]:
# Step 3: Create test cases
data = {
    "text": [
        "I absolutely love this!",
        "It's fine, nothing special.",
        "This is terrible, I'm disappointed.",
        "Best decision ever.",
        "Not bad, could be better.",
        "I hate it here.",
        "Meh, it's okay.",
        "Fantastic work! Keep it up.",
        "I'm not sure how I feel.",
        "Worst experience ever."
    ],
    "label": [2, 1, 0, 2, 1, 0, 1, 2, 1, 0]  # Ground truth labels
}
df = pd.DataFrame(data)

In [10]:
# Step 4: Load pipeline and predict
sentiment_pipeline = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)

# Hugging Face model returns logits-based labels like 'LABEL_0'
preds = sentiment_pipeline(df["text"].tolist())

Device set to use cpu


In [14]:
preds

[{'label': 'LABEL_2', 'score': 0.9896766543388367},
 {'label': 'LABEL_2', 'score': 0.6005365252494812},
 {'label': 'LABEL_0', 'score': 0.9773134589195251},
 {'label': 'LABEL_2', 'score': 0.9259532690048218},
 {'label': 'LABEL_2', 'score': 0.6574769616127014},
 {'label': 'LABEL_0', 'score': 0.9738256335258484},
 {'label': 'LABEL_2', 'score': 0.6583998799324036},
 {'label': 'LABEL_2', 'score': 0.9814394116401672},
 {'label': 'LABEL_0', 'score': 0.6617536544799805},
 {'label': 'LABEL_0', 'score': 0.9699680805206299}]

In [11]:
# Step 5: Convert predictions to numeric labels
label_str_to_id = {"negative": 0, "neutral": 1, "positive": 2}
pred_labels = [label_str_to_id[id2label[int(p['label'].split('_')[-1])]] for p in preds]

In [12]:
# Step 6: Add predictions to the DataFrame
df["predicted_label"] = pred_labels
df["predicted_sentiment"] = df["predicted_label"].map({0: "negative", 1: "neutral", 2: "positive"})


In [13]:
# Step 7: Print the classification report
print("📋 Classification Report:\n")
print(classification_report(df["label"], df["predicted_label"], target_names=["negative", "neutral", "positive"]))

📋 Classification Report:

              precision    recall  f1-score   support

    negative       0.75      1.00      0.86         3
     neutral       0.00      0.00      0.00         4
    positive       0.50      1.00      0.67         3

    accuracy                           0.60        10
   macro avg       0.42      0.67      0.51        10
weighted avg       0.38      0.60      0.46        10



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


## Observation

- The model is biased towards the negative and positive classes, ignoring the neutral class completely.

- The recall is perfect (1.00) for both negative and positive, but precision suffers, especially for the positive class.

- The neutral class needs attention — consider:

    Adding more training examples,

    Adjusting class weights or loss function,

    Reviewing data labeling consistency.