# Sentiment Analysis with Zero-shot text clasification

In [1]:
import pandas as pd
from sklearn.metrics import classification_report

In [2]:
# Step 3: Create test cases
data = {
    "text": [
        "I absolutely love this!",
        "It's fine, nothing special.",
        "This is terrible, I'm disappointed.",
        "Best decision ever.",
        "Not bad, could be better.",
        "I hate it here.",
        "Meh, it's okay.",
        "Fantastic work! Keep it up.",
        "I'm not sure how I feel.",
        "Worst experience ever."
    ],
    "label": ['positive', "neutral", "negative", 'positive', "neutral", "negative", "neutral", 'positive', "neutral", "negative"]  # Ground truth labels
}
df = pd.DataFrame(data)

In [3]:
from transformers import pipeline

classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")

text = "The product was okay, not too bad but not great either."
labels = ["positive", "neutral", "negative"]
result = classifier(df["text"].tolist(), labels)
print(result)

  from .autonotebook import tqdm as notebook_tqdm
Device set to use cpu


[{'sequence': 'I absolutely love this!', 'labels': ['positive', 'neutral', 'negative'], 'scores': [0.9621205925941467, 0.03216494619846344, 0.005714462138712406]}, {'sequence': "It's fine, nothing special.", 'labels': ['neutral', 'positive', 'negative'], 'scores': [0.9627881050109863, 0.029497094452381134, 0.007714795880019665]}, {'sequence': "This is terrible, I'm disappointed.", 'labels': ['negative', 'neutral', 'positive'], 'scores': [0.9974903464317322, 0.0015800370601937175, 0.0009296110947616398]}, {'sequence': 'Best decision ever.', 'labels': ['positive', 'neutral', 'negative'], 'scores': [0.9935014843940735, 0.0040102615021169186, 0.0024883218575268984]}, {'sequence': 'Not bad, could be better.', 'labels': ['neutral', 'positive', 'negative'], 'scores': [0.6571953892707825, 0.28543177247047424, 0.057372868061065674]}, {'sequence': 'I hate it here.', 'labels': ['negative', 'neutral', 'positive'], 'scores': [0.9935814142227173, 0.004364476073533297, 0.0020541155245155096]}, {'sequ

In [4]:
# Extract top label for each dict in list
result_list = [
    {
        'sequence': item['sequence'],
        'label': item['labels'][item['scores'].index(max(item['scores']))]
    }
    for item in result
]
result_list

[{'sequence': 'I absolutely love this!', 'label': 'positive'},
 {'sequence': "It's fine, nothing special.", 'label': 'neutral'},
 {'sequence': "This is terrible, I'm disappointed.", 'label': 'negative'},
 {'sequence': 'Best decision ever.', 'label': 'positive'},
 {'sequence': 'Not bad, could be better.', 'label': 'neutral'},
 {'sequence': 'I hate it here.', 'label': 'negative'},
 {'sequence': "Meh, it's okay.", 'label': 'neutral'},
 {'sequence': 'Fantastic work! Keep it up.', 'label': 'positive'},
 {'sequence': "I'm not sure how I feel.", 'label': 'neutral'},
 {'sequence': 'Worst experience ever.', 'label': 'negative'}]

In [5]:
df["pred_labels"] = [x['label'] for x in result_list]

In [6]:
print("📋 Classification Report:\n")
print(classification_report(df["label"], df["pred_labels"]))

📋 Classification Report:

              precision    recall  f1-score   support

    negative       1.00      1.00      1.00         3
     neutral       1.00      1.00      1.00         4
    positive       1.00      1.00      1.00         3

    accuracy                           1.00        10
   macro avg       1.00      1.00      1.00        10
weighted avg       1.00      1.00      1.00        10

