In [2]:
from transformers import pipeline
import spacy
import pandas as pd
import numpy as np
from sklearn.metrics import classification_report
from collections import defaultdict

In [5]:
class AspectBasedSentimentAnalyzer:
    def __init__(self):
        """Initialize the aspect-based sentiment analyzer."""
        # Load pre-trained sentiment analyzer
        self.sentiment_analyzer = pipeline("sentiment-analysis")
        
        # Load SpaCy for aspect extraction
        self.nlp = spacy.load("en_core_web_sm")
        
        # Define aspect categories and their keywords
        self.aspect_keywords = {
            'performance': ['speed', 'fast', 'slow', 'performance', 'lag'],
            'battery': ['battery', 'charge', 'power', 'duration'],
            'display': ['screen', 'display', 'resolution', 'bright'],
            'price': ['price', 'cost', 'expensive', 'cheap', 'worth'],
            'design': ['design', 'look', 'build', 'quality', 'material']
        }
        
    def extract_aspects(self, text):
        """
        Extract aspects from text using dependency parsing and keyword matching.
        
        Args:
            text (str): Input text
            
        Returns:
            list: List of extracted aspects with their associated text
        """
        doc = self.nlp(text)
        aspects = []
        
        # Extract noun phrases and their associated words
        for chunk in doc.noun_chunks:
            aspect_found = False
            associated_text = []
            
            # Find the aspect category
            for aspect, keywords in self.aspect_keywords.items():
                if any(keyword in chunk.text.lower() for keyword in keywords):
                    # Get the surrounding context
                    start_idx = max(0, chunk.start - 2)
                    end_idx = min(len(doc), chunk.end + 3)
                    context = doc[start_idx:end_idx].text
                    
                    aspects.append({
                        'aspect': aspect,
                        'term': chunk.text,
                        'context': context
                    })
                    aspect_found = True
                    break
                    
        return aspects
    
    def analyze_sentiment(self, text):
        """Analyze sentiment of given text."""
        result = self.sentiment_analyzer(text)
        return {
            'label': result[0]['label'],
            'score': result[0]['score']
        }
    
    def analyze_aspects(self, review):
        """
        Perform aspect-based sentiment analysis on a review.
        
        Args:
            review (str): Input review text
            
        Returns:
            dict: Sentiment analysis results for each aspect
        """
        aspects = self.extract_aspects(review)
        results = []
        
        for aspect_info in aspects:
            sentiment = self.analyze_sentiment(aspect_info['context'])
            results.append({
                'aspect': aspect_info['aspect'],
                'term': aspect_info['term'],
                'context': aspect_info['context'],
                'sentiment': sentiment['label'],
                'confidence': sentiment['score']
            })
            
        return results
    
    def evaluate_on_dataset(self, test_data):
        """
        Evaluate the system on a labeled dataset.
        
        Args:
            test_data: List of (review, labeled_aspects) tuples
            
        Returns:
            dict: Evaluation metrics for aspect extraction and sentiment analysis
        """
        aspect_predictions = []
        aspect_true = []
        sentiment_predictions = []
        sentiment_true = []
        
        for review, labeled in test_data:
            predicted_aspects = self.analyze_aspects(review)
            
            for labeled_aspect in labeled:
                aspect_true.append(labeled_aspect['aspect'])
                sentiment_true.append(labeled_aspect['sentiment'])
                
                # Find matching predicted aspect
                matched = False
                for pred in predicted_aspects:
                    if pred['aspect'] == labeled_aspect['aspect']:
                        aspect_predictions.append(pred['aspect'])
                        sentiment_predictions.append(pred['sentiment'])
                        matched = True
                        break
                
                if not matched:
                    aspect_predictions.append('NONE')
                    sentiment_predictions.append('NONE')
        
        return {
            'aspect_classification': classification_report(
                aspect_true, aspect_predictions, digits=4
            ),
            'sentiment_classification': classification_report(
                sentiment_true, sentiment_predictions, digits=4
            )
        }

In [7]:
if __name__ == "__main__":
    # Install SpaCy model
    !python -m spacy download en_core_web_sm
    
    # Initialize analyzer
    analyzer = AspectBasedSentimentAnalyzer()
    
    # Example reviews
    reviews = [
        "The battery life is exceptional, lasting over 12 hours. However, the screen resolution is disappointing for this price.",
        "Amazing build quality and design, but the performance is a bit sluggish when running multiple apps.",
        "The price is reasonable for the features offered. The battery life is good, but the display could be better.",
        "I love the design and screen resolution. The battery life is decent, but the performance is lacking.",
        "The price is too high for the quality. The battery life is short and the performance is slow."
    ]

    # Perform aspect-based sentiment analysis
    for review in reviews:
        print(f"Review: {review}")
        results = analyzer.analyze_aspects(review)
        for aspect in results:
            print(f"- Aspect: {aspect['aspect']} ({aspect['term']})")
            print(f"  Context: {aspect['context']}")
            print(f"  Sentiment: {aspect['sentiment']} ({aspect['confidence']:.2f})")
        print()

    # Evaluate on a labeled dataset
    test_data = [
        ("The battery life is great, but the display is poor.", [
            {'aspect': 'battery', 'term': 'battery life', 'sentiment': 'positive'},
            {'aspect': 'display', 'term': 'display', 'sentiment': 'negative'}
        ]),
        ("The performance is fast and the battery lasts a long time.", [
            {'aspect': 'performance', 'term': 'performance', 'sentiment': 'positive'},
            {'aspect': 'battery', 'term': 'battery', 'sentiment': 'positive'}
        ]),
        ("The design is sleek and the price is reasonable.", [
            {'aspect': 'design', 'term': 'design', 'sentiment': 'positive'},
            {'aspect': 'price', 'term': 'price', 'sentiment': 'positive'}
        ]),
        ("The battery life is short, but the price is low.", [
            {'aspect': 'battery', 'term': 'battery life', 'sentiment': 'negative'},
            {'aspect': 'price', 'term': 'price', 'sentiment': 'positive'}
        ])
    ]

    results = analyzer.evaluate_on_dataset(test_data)
    print("Aspect Classification Report:")
    print(results['aspect_classification'])
    print("Sentiment Classification Report:")
    print(results['sentiment_classification'])


No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.


Collecting en-core-web-sm==3.8.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl (12.8 MB)
     ---------------------------------------- 0.0/12.8 MB ? eta -:--:--
     --- ------------------------------------ 1.0/12.8 MB 7.1 MB/s eta 0:00:02
     ------- -------------------------------- 2.4/12.8 MB 7.4 MB/s eta 0:00:02
     ---------- ----------------------------- 3.4/12.8 MB 6.3 MB/s eta 0:00:02
     ------------- -------------------------- 4.2/12.8 MB 5.5 MB/s eta 0:00:02
     ------------- -------------------------- 4.5/12.8 MB 5.1 MB/s eta 0:00:02
     --------------- ------------------------ 5.0/12.8 MB 4.4 MB/s eta 0:00:02
     ----------------- ---------------------- 5.5/12.8 MB 4.0 MB/s eta 0:00:02
     ------------------ --------------------- 6.0/12.8 MB 3.7 MB/s eta 0:00:02
     ------------------- -------------------- 6.3/12.8 MB 3.6 MB/s eta 0:00:02
     --------------------- --------------

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
