In [8]:
import pandas as pd
import torch
import time
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import numpy as np

class SubjectivityClassifier:
    def __init__(self, model_name, device=None):
        """
        Initialize a subjectivity classifier with a specified pre-trained model.
        
        Args:
            model_name (str): Name of the pre-trained model to use
            device (str, optional): Device to run the model on ('cuda' or 'cpu')
        """
        self.model_name = model_name
        self.device = device if device else ("cuda" if torch.cuda.is_available() else "cpu")
        
        print(f"Loading model: {model_name} on {self.device}")
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.model = AutoModelForSequenceClassification.from_pretrained(model_name)
        self.model.to(self.device)
        self.model.eval()
    
    def classify(self, text, return_time=False):
        """
        Classify text as subjective (opinion) or objective (fact).
        
        Args:
            text (str): Text to classify
            return_time (bool): Whether to return inference time
            
        Returns:
            dict: Classification results including scores and inference time if requested
        """
        start_time = time.time()
        
        inputs = self.tokenizer(text, return_tensors="pt", truncation=True, padding=True).to(self.device)
        
        with torch.no_grad():
            outputs = self.model(**inputs)
        
        # Get probabilities
        probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
        
        # Check if the model has 2 or more output classes
        if probs.shape[1] >= 2:
            # Assuming: 0 = objective/fact, 1 = subjective/opinion
            # This is the common convention but may need adjustment for specific models
            obj_score = probs[0][0].item()
            subj_score = probs[0][1].item()
            classification = "FACT" if obj_score > subj_score else "OPINION"
        else:
            # For single score models (rare)
            subj_score = probs[0][0].item()
            obj_score = 1 - subj_score
            classification = "OPINION" if subj_score >= 0.5 else "FACT"
        
        end_time = time.time()
        inference_time = end_time - start_time
        
        result = {
            "text": text,
            "objective_score": obj_score,
            "subjective_score": subj_score,
            "classification": classification
        }
        
        if return_time:
            result["inference_time"] = inference_time
            
        return result

def benchmark_models(models, test_sentences, runs=3):
    """
    Benchmark multiple subjectivity classification models.
    
    Args:
        models (list): List of model names to benchmark
        test_sentences (list): List of sentences to test
        runs (int): Number of runs for more reliable timing
        
    Returns:
        pandas.DataFrame: Benchmark results
    """
    all_results = []
    
    for model_name in models:
        print(f"\nBenchmarking model: {model_name}")
        classifier = SubjectivityClassifier(model_name)
        
        # Warmup run
        for sentence in test_sentences:
            classifier.classify(sentence)
        
        # Benchmark runs
        model_times = []
        results = []
        
        for _ in range(runs):
            batch_start = time.time()
            
            for sentence in test_sentences:
                result = classifier.classify(sentence, return_time=True)
                if _ == 0:  # Only save classification results from first run
                    results.append(result)
                model_times.append(result["inference_time"])
            
            batch_end = time.time()
            batch_time = batch_end - batch_start
            print(f"Run completed in {batch_time:.4f}s (avg {batch_time/len(test_sentences):.4f}s per sentence)")
        
        # Calculate average inference time
        avg_time = np.mean(model_times)
        
        # Add model name and timing information to results
        for result in results:
            result["model"] = model_name
            result["avg_inference_time"] = avg_time
            all_results.append(result)
    
    # Create and return DataFrame
    df = pd.DataFrame(all_results)
    return df

# Main execution
if __name__ == "__main__":
    # Models to benchmark (selected specifically for subjectivity classification)
    models = [
        #"prithivida/parrot_subjectivity_classifier",  # Popular subjectivity classifier
        #"textattack/distilbert-base-uncased-MPQA",    # DistilBERT fine-tuned on MPQA
        #"yuriykatko/subjectivity-classifier"          # Custom subjectivity classifier

        "lighteternal/fact-or-opinion-xlmr-el",
        "GroNLP/mdebertav3-subjectivity-multilingual"
    ]
    
    # Test sentences (mix of clear facts and opinions)
    test_sentences = [
        "The Earth revolves around the Sun.",
        "I think this movie is absolutely terrible.",
        "Water boils at 100 degrees Celsius at sea level.",
        "In my opinion, the government should increase funding for education.",
        "Paris is the capital of France.",
        "This is probably the best restaurant in town."
    ]
    
    # Run benchmark
    results_df = benchmark_models(models, test_sentences)
    
    # Display results by model
    for model in models:
        print(f"\nResults for {model}:")
        model_df = results_df[results_df["model"] == model]
        print(model_df[["text", "objective_score", "subjective_score", "classification", "inference_time"]])
    
    # Display performance summary
    print("\nPerformance Summary:")
    performance_df = results_df.groupby("model")["avg_inference_time"].mean().reset_index()
    performance_df = performance_df.sort_values("avg_inference_time")
    print(performance_df)
    
    # Save results to CSV
    results_df.to_csv("subjectivity_benchmark_results.csv", index=False)
    print("\nDetailed results saved to 'subjectivity_benchmark_results.csv'")


Benchmarking model: lighteternal/fact-or-opinion-xlmr-el
Loading model: lighteternal/fact-or-opinion-xlmr-el on cpu
Run completed in 0.7401s (avg 0.1233s per sentence)
Run completed in 0.5590s (avg 0.0932s per sentence)
Run completed in 0.5560s (avg 0.0927s per sentence)

Benchmarking model: GroNLP/mdebertav3-subjectivity-multilingual
Loading model: GroNLP/mdebertav3-subjectivity-multilingual on cpu


To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Run completed in 2.3878s (avg 0.3980s per sentence)
Run completed in 2.4740s (avg 0.4123s per sentence)
Run completed in 2.4978s (avg 0.4163s per sentence)

Results for lighteternal/fact-or-opinion-xlmr-el:
                                                text  objective_score  \
0                 The Earth revolves around the Sun.         0.001928   
1         I think this movie is absolutely terrible.         0.997180   
2   Water boils at 100 degrees Celsius at sea level.         0.003098   
3  In my opinion, the government should increase ...         0.994117   
4                    Paris is the capital of France.         0.001870   
5      This is probably the best restaurant in town.         0.013378   

   subjective_score classification  inference_time  
0          0.998072        OPINION        0.199014  
1          0.002820           FACT        0.125007  
2          0.996902        OPINION        0.122011  
3          0.005883           FACT        0.114010  
4          0.998

In [9]:
results_df.tail()

Unnamed: 0,text,objective_score,subjective_score,classification,inference_time,model,avg_inference_time
7,I think this movie is absolutely terrible.,0.023,0.977,OPINION,0.37208,GroNLP/mdebertav3-subjectivity-multilingual,0.408807
8,Water boils at 100 degrees Celsius at sea level.,0.989563,0.010437,FACT,0.380607,GroNLP/mdebertav3-subjectivity-multilingual,0.408807
9,"In my opinion, the government should increase ...",0.074654,0.925346,OPINION,0.459381,GroNLP/mdebertav3-subjectivity-multilingual,0.408807
10,Paris is the capital of France.,0.981899,0.018101,FACT,0.470325,GroNLP/mdebertav3-subjectivity-multilingual,0.408807
11,This is probably the best restaurant in town.,0.02186,0.97814,OPINION,0.354459,GroNLP/mdebertav3-subjectivity-multilingual,0.408807
