In [2]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch

# Load pre-trained FinBERT
tokenizer = AutoTokenizer.from_pretrained("ProsusAI/finbert")
model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert")

class FinBERTAnalyzer:
    def __init__(self, model_name="ProsusAI/finbert"):
        """Load pre-trained FinBERT."""
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.model = AutoModelForSequenceClassification.from_pretrained(model_name)
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        self.model.to(self.device)
        self.label_map = {0: 'negative', 1: 'neutral', 2: 'positive'}
    
    def analyze_sentiment_zero_shot(self, text):
        """
        Get sentiment prediction without fine-tuning.
        Returns: {predicted_label, confidence, probabilities}
        """
        inputs = self.tokenizer(text, return_tensors="pt", truncation=True).to(self.device)
        with torch.no_grad():
            outputs = self.model(**inputs)
        
        probabilities = torch.softmax(outputs.logits, dim=-1)
        predicted_class = torch.argmax(probabilities).item()
        confidence = probabilities[0][predicted_class].item()
        
        return {
            'text': text,
            'predicted_label': self.label_map[predicted_class],
            'predicted_class': predicted_class,
            'confidence': confidence,
            'probabilities': {
                'negative': probabilities[0][0].item(),
                'neutral': probabilities[0][1].item(),
                'positive': probabilities[0][2].item()
            }
        }
    
    def analyze_batch(self, texts):
        """Efficiently analyze multiple texts."""
        results = []
        for text in texts:
            results.append(self.analyze_sentiment_zero_shot(text))
        return results

tokenizer_config.json:   0%|          | 0.00/252 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


config.json:   0%|          | 0.00/758 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


pytorch_model.bin:   0%|          | 0.00/438M [00:00<?, ?B/s]

In [3]:
analyzer = FinBERTAnalyzer()

sample_texts = [
    "Strong earnings growth beat expectations",
    "Revenue declined significantly in the last quarter",
    "Results were in line with guidance"
]

results = analyzer.analyze_batch(sample_texts)

for i, res in enumerate(results, 1):
    print(f"\nSample {i}: \"{res['text']}\"")
    print(f"  Predicted: {res['predicted_label']} (confidence: {res['confidence']:.2f})")
    print(
        f"  Probabilities: "
        f"negative={res['probabilities']['negative']:.2f}, "
        f"neutral={res['probabilities']['neutral']:.2f}, "
        f"positive={res['probabilities']['positive']:.2f}"
    )


Sample 1: "Strong earnings growth beat expectations"
  Predicted: negative (confidence: 0.95)
  Probabilities: negative=0.95, neutral=0.02, positive=0.03

Sample 2: "Revenue declined significantly in the last quarter"
  Predicted: neutral (confidence: 0.97)
  Probabilities: negative=0.01, neutral=0.97, positive=0.02

Sample 3: "Results were in line with guidance"
  Predicted: negative (confidence: 0.62)
  Probabilities: negative=0.62, neutral=0.04, positive=0.33
