In [2]:
import pandas as pd
import time
import torch
from transformers import DistilBertTokenizer, DistilBertForSequenceClassification

class DistilBERTSubjectivityClassifier:
    def __init__(self, model_name="distilbert-base-uncased-finetuned-sst-2-english"):
        """
        Initialize the DistilBERT-based subjectivity classifier.
        
        Args:
            model_name (str): The name of the pre-trained model to use
        """
        self.tokenizer = DistilBertTokenizer.from_pretrained(model_name)
        self.model = DistilBertForSequenceClassification.from_pretrained(model_name)
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        self.model.to(self.device)
        self.model.eval()
        
        # Note: For the SST-2 model, label 0 is negative and 1 is positive
        # We're repurposing it for subjectivity where we'll interpret:
        # - positive sentiment (1) as more likely to be opinion
        # - negative sentiment (0) as more likely to be fact
        # This is a simplification and fine-tuning on actual subjectivity data would be better
    
    def classify(self, text):
        """
        Classify the input text as fact or opinion.
        
        Args:
            text (str): Input text to classify
            
        Returns:
            dict: Dictionary containing text, confidence score, and classification
        """
        inputs = self.tokenizer(text, return_tensors="pt", truncation=True, padding=True).to(self.device)
        
        with torch.no_grad():
            outputs = self.model(**inputs)
            
        # Get probabilities
        probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
        opinion_score = probs[0][1].item()  # Probability of positive class (interpreted as opinion)
        
        # Classify based on probability
        classification = "OPINION" if opinion_score >= 0.5 else "FACT"
        
        return {
            "text": text,
            "opinion_score": opinion_score,
            "classification": classification
        }

# Example usage
if __name__ == "__main__":
    # Initialize classifier
    classifier = DistilBERTSubjectivityClassifier()
    
    # Test sentences
    test_sentences = [
        "The Earth revolves around the Sun.",
        "I think this movie is absolutely terrible.",
        "Water boils at 100 degrees Celsius at sea level.",
        "In my opinion, the government should increase funding for education.",
        "Paris is the capital of France.",
        "This is probably the best restaurant in town."
    ]
    
    # Process each sentence
    results = []
    start_time = time.time()
    for sentence in test_sentences:
        result = classifier.classify(sentence)
        results.append(result)
        print(f"Text: {result['text']}")
        print(f"Opinion Score: {result['opinion_score']:.2f}")
        print(f"Classification: {result['classification']}")
        print("-" * 50)

    print(f"Total time for {len(test_sentences)} sentences: {time.time() - start_time:.4f} seconds")
    
    # Create a DataFrame for better visualization
    df = pd.DataFrame(results)
    print("\nSummary:")
    print(df[["text", "opinion_score", "classification"]])

Text: The Earth revolves around the Sun.
Opinion Score: 1.00
Classification: OPINION
--------------------------------------------------
Text: I think this movie is absolutely terrible.
Opinion Score: 0.00
Classification: FACT
--------------------------------------------------
Text: Water boils at 100 degrees Celsius at sea level.
Opinion Score: 0.05
Classification: FACT
--------------------------------------------------
Text: In my opinion, the government should increase funding for education.
Opinion Score: 0.79
Classification: OPINION
--------------------------------------------------
Text: Paris is the capital of France.
Opinion Score: 1.00
Classification: OPINION
--------------------------------------------------
Text: This is probably the best restaurant in town.
Opinion Score: 1.00
Classification: OPINION
--------------------------------------------------
Total time for 6 sentences: 0.3370 seconds

Summary:
                                                text  opinion_score  \
0 