In [26]:
import json

def load_data():
    """
    Loads test data and results data from two JSONL files.
    Skips lines that cannot be parsed as valid JSON.
    """
    test_file_path = "../datasets/laptop_quad_test.tsv.jsonl"
    results_file_path = "../datasets/clean_full_results.jsonl"
    
    test_data = []
    results_data = []
    
    # Load test data
    with open(test_file_path, "r", encoding="utf-8") as f:
        for line in f:
            line = line.strip()
            if not line:
                # Skip empty lines
                continue
            try:
                data_item = json.loads(line)
                test_data.append(data_item)
            except json.JSONDecodeError:
                # Skip lines that aren't valid JSON
                continue
            
    # Load results data
    with open(results_file_path, "r", encoding="utf-8") as f:
        for line in f:
            line = line.strip()
            if not line:
                # Skip empty lines
                continue
            try:
                data_item = json.loads(line)
                results_data.append(data_item)
            except json.JSONDecodeError:
                # Skip lines that aren't valid JSON
                continue
            
    return test_data, results_data

def evaluate_aspect_and_sentiment(test_data, results_data):
    """
    Evaluates aspect-sentiment pairs for precision, recall, and F1 score.
    Skips any entries that are not dictionaries or do not have a valid 'labels' list.
    
    Debug statements print the test_label_set and result_label_set for every comparison.
    """
    total_test_labels = 0
    total_result_labels = 0
    total_correct = 0

    # Loop over both datasets in parallel
    for test_item, result_item in zip(test_data, results_data):
        # Ensure both items are dictionaries
        if not isinstance(test_item, dict) or not isinstance(result_item, dict):
            continue
        
        # Extract 'labels' as lists (skip if not a list)
        test_labels = test_item.get("labels", [])
        result_labels = result_item.get("labels", [])
        if not isinstance(test_labels, list) or not isinstance(result_labels, list):
            continue
        
        # Build sets of (aspect, polarity) from test data
        test_label_set = set()
        for label in test_labels:
            if isinstance(label, dict):
                aspect = label.get("aspect")
                polarity = label.get("polarity")
                test_label_set.add((aspect, polarity))
        
        # Build sets of (aspect, polarity) from results data
        result_label_set = set()
        for label in result_labels:
            if isinstance(label, dict):
                aspect = label.get("aspect")
                polarity = label.get("polarity")
                result_label_set.add((aspect, polarity))

        # Tally up counts
        total_test_labels += len(test_label_set)
        total_result_labels += len(result_label_set)
        total_correct += len(test_label_set.intersection(result_label_set))
    
    # Compute precision, recall, F1
    precision = total_correct / total_result_labels if total_result_labels else 0.0
    recall = total_correct / total_test_labels if total_test_labels else 0.0
    f1 = (
        (2 * precision * recall) / (precision + recall)
        if (precision + recall) > 0
        else 0.0
    )

    metrics = {
        "aspect_sentiment": {
            "precision": precision,
            "recall": recall,
            "f1": f1
        }
    }
    return metrics

def run_evaluation():
    # Load data
    test_data, results_data = load_data()
    
    # Evaluate
    metrics = evaluate_aspect_and_sentiment(test_data, results_data)
    
    # Print final results
    print("\nEvaluation Metrics:")
    print(json.dumps(metrics, indent=4))

if __name__ == "__main__":
    run_evaluation()



Evaluation Metrics:
{
    "aspect_sentiment": {
        "precision": 0.551829268292683,
        "recall": 0.5397614314115308,
        "f1": 0.5457286432160804
    }
}
