In [1]:
import json
from collections import defaultdict

In [2]:
def analyze_ambiguity(file_path):
    with open(file_path, 'r') as f:
        data = json.load(f)

    total_samples = len(data)
    ambiguous_aspects = 0
    ambiguous_opinions = 0
    ambiguous_samples = []

    for i, sample in enumerate(data):
        words = sample['words']
        raw_words = sample.get('raw_words', ' '.join(words))
        word_counts = defaultdict(int)
        for word in words:
            word_counts[word] += 1

        sample_ambiguous = False
        ambiguous_aspects_sample = []
        ambiguous_opinions_sample = []

        # Check for ambiguous aspects
        for aspect in sample['aspects']:
            aspect_term = ' '.join(aspect['term'])
            if word_counts[aspect_term] > 1:
                ambiguous_aspects += 1
                ambiguous_aspects_sample.append(aspect)
                sample_ambiguous = True

        # Check for ambiguous opinions
        for opinion in sample['opinions']:
            opinion_term = ' '.join(opinion['term'])
            if word_counts[opinion_term] > 1:
                ambiguous_opinions += 1
                ambiguous_opinions_sample.append(opinion)
                sample_ambiguous = True

        if sample_ambiguous:
            ambiguous_samples.append({
                'index': i,
                'raw_words': raw_words,
                'ambiguous_aspects': ambiguous_aspects_sample,
                'ambiguous_opinions': ambiguous_opinions_sample
            })

    print(f"Total samples: {total_samples}")
    print(f"Samples with ambiguous aspects: {ambiguous_aspects} ({ambiguous_aspects/total_samples:.2%})")
    print(f"Samples with ambiguous opinions: {ambiguous_opinions} ({ambiguous_opinions/total_samples:.2%})")
    
    print("\nDetailed Ambiguous Samples:")
    for sample in ambiguous_samples:
        print(f"\nSample {sample['index']}:")
        print(f"Text: {sample['raw_words']}")
        
        if sample['ambiguous_aspects']:
            print("Ambiguous Aspects:")
            for aspect in sample['ambiguous_aspects']:
                print(f"  - Term: {' '.join(aspect['term'])}")
                print(f"    Span: {aspect['from']} to {aspect['to']}")
                print(f"    Polarity: {aspect['polarity']}")
        
        if sample['ambiguous_opinions']:
            print("Ambiguous Opinions:")
            for opinion in sample['ambiguous_opinions']:
                print(f"  - Term: {' '.join(opinion['term'])}")
                print(f"    Span: {opinion['from']} to {opinion['to']}")

In [4]:
file_path = "/Users/tomvolker/localProjects/ba/absa/data/absa/json/pengb/14lap/test.json"
analyze_ambiguity(file_path)

Total samples: 328
Samples with ambiguous aspects: 6 (1.83%)
Samples with ambiguous opinions: 12 (3.66%)

Detailed Ambiguous Samples:

Sample 25:
Text: This thing is awesome , everything always works , everything is always easy to set up , everything is compatible , its literally everything I could ask for .
Ambiguous Opinions:
  - Term: always
    Span: 6 to 7

Sample 69:
Text: : ) Great product , great price , great delivery , and great service .
Ambiguous Opinions:
  - Term: great
    Span: 5 to 6
  - Term: great
    Span: 8 to 9
  - Term: great
    Span: 12 to 13

Sample 218:
Text: This mac does come with an extender cable and I 'm using mine right now hoping the cable will stay nice for the many years I plan on using this mac .
Ambiguous Aspects:
  - Term: cable
    Span: 17 to 18
    Polarity: POS

Sample 236:
Text: The SD card reader is slightly recessed and upside down ( the nail slot on the card can not be accessed ) , if this was a self ejecting slot this would not be an issu