DistilBERT model based on the Transformer BERT resulting from Knowledge distilation, being 40% smaller, 60% faster and retaining 95% accuracy.

This particular one is trained on the GoEmotions dataset which has 27 emotions!

In [3]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import torch
import time

class EnhancedEmotionClassifier:
    def __init__(self, model_name="joeddav/distilbert-base-uncased-go-emotions-student"):
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.model = AutoModelForSequenceClassification.from_pretrained(model_name)
        self.model.eval()
        
        # GoEmotions labels with neutral
        self.labels = [
            "admiration", "amusement", "anger", "annoyance", "approval", "caring",
            "confusion", "curiosity", "desire", "disappointment", "disapproval",
            "disgust", "embarrassment", "excitement", "fear", "gratitude", "grief",
            "joy", "love", "nervousness", "neutral", "optimism", "pride", "realization",
            "relief", "remorse", "sadness", "surprise"
        ]
    
    def classify(self, text, top_k=3):
        # Tokenize and prepare input
        inputs = self.tokenizer(text, return_tensors="pt", padding=True, truncation=True)
        
        # Perform inference
        with torch.inference_mode():
            outputs = self.model(**inputs)
            
        # Get predictions
        scores = torch.sigmoid(outputs.logits)[0]  # Use sigmoid for multi-label
        
        # Get top-k emotions
        top_scores, top_indices = torch.topk(scores, top_k)
        
        # Format results
        top_emotions = [
            {"emotion": self.labels[idx], "score": score.item()}
            for score, idx in zip(top_scores, top_indices)
        ]
        
        # Also include if neutral is detected
        if "neutral" not in [e["emotion"] for e in top_emotions]:
            neutral_idx = self.labels.index("neutral")
            neutral_score = scores[neutral_idx].item()
            if neutral_score > 0.3:  # Threshold can be adjusted
                top_emotions.append({"emotion": "neutral", "score": neutral_score})
        
        return top_emotions

# Test the classifier
classifier = EnhancedEmotionClassifier()

# Example sentences
test_sentences = [
    "I'm so happy about this new opportunity!",
    "I'm really disappointed with the service we received.",
    "That movie was so terrifying, I couldn't sleep.",
    "I just finished reading the report. It contains the information we expected.",  # Neutral
    "The weather is cloudy today.",  # More neutral
    "He have been working on this project for three year."
]

# Test performance
start_time = time.time()
for sentence in test_sentences:
    result = classifier.classify(sentence)
    print(f"Sentence: '{sentence}'")
    print(f"Top emotions: {result}")
    print()

print(f"Total time for {len(test_sentences)} sentences: {time.time() - start_time:.4f} seconds")

Sentence: 'I'm so happy about this new opportunity!'
Top emotions: [{'emotion': 'excitement', 'score': 0.9490593075752258}, {'emotion': 'neutral', 'score': 0.9358248114585876}, {'emotion': 'joy', 'score': 0.9042986631393433}]

Sentence: 'I'm really disappointed with the service we received.'
Top emotions: [{'emotion': 'disappointment', 'score': 0.9518792033195496}, {'emotion': 'disapproval', 'score': 0.9202450513839722}, {'emotion': 'remorse', 'score': 0.8954749703407288}]

Sentence: 'That movie was so terrifying, I couldn't sleep.'
Top emotions: [{'emotion': 'fear', 'score': 0.934294581413269}, {'emotion': 'nervousness', 'score': 0.8598796725273132}, {'emotion': 'disgust', 'score': 0.8301916122436523}]

Sentence: 'I just finished reading the report. It contains the information we expected.'
Top emotions: [{'emotion': 'pride', 'score': 0.8560605049133301}, {'emotion': 'caring', 'score': 0.7650259137153625}, {'emotion': 'approval', 'score': 0.6689854860305786}, {'emotion': 'neutral', 's