# Calcutes Precision and Recall based on exact String Matching

In [3]:
## Calculate Precesion and recall for a given set of predictions and ground truth
## calculation works on the number of true positives (TP) and false positives (FP) and false negatives (FN)

def calculate_precision_recall(human_labeled_set, predicted_set):
    # Handling edge case where either set is empty
    if not human_labeled_set or not predicted_set:
        return 0, 0

    # Convert both sets to lowercase for case-insensitive comparison
    human_labeled_set = {s.lower() for s in human_labeled_set}
    predicted_set = {s.lower() for s in predicted_set}

    # Calculate True Positives (TP), False Positives (FP), and False Negatives (FN)
    true_positives = human_labeled_set.intersection(predicted_set)
    false_positives = predicted_set.difference(human_labeled_set)
    false_negatives = human_labeled_set.difference(predicted_set)

    # Calculate Precision and Recall
    precision = 0 if not true_positives else len(true_positives) / (len(true_positives) + len(false_positives))
    recall = 0 if not true_positives else len(true_positives) / (len(true_positives) + len(false_negatives))

    return precision, recall

# Example usage
human_labeled = ["Art. 28 ZGB", "Art. 19c ZGB", "Art. 27 Abs. 2 ZGB"]
predicted = ["ART. 10 URG", "ART. 11 URG", "ART. 27a OR", "ART. 28 OR", "ART. 28 ZGB"]

precision, recall = calculate_precision_recall(human_labeled, predicted)
print("Precision:", precision)
print("Recall:", recall)


Precision: 0.2
Recall: 0.3333333333333333


# Calculates Precision & Recall based on Articles and Sub-Articles (without considering paragraphs)

In [5]:
import re

def extract_article_reference(article_string):
    """
    Extracts the article number and minor (if any) from a legal article reference string.
    For example, 'Art. 34a Abs. 2 OR' -> '34a'
    """
    match = re.search(r'Art\.\s*(\d+[a-z]?)', article_string)
    return match.group(1).lower() if match else None

def is_subset_article(human_article, predicted_article):
    """
    Checks if the predicted article is the same as or a subset of the human-labeled article.
    For example, '34' is a subset of '34a', and '34' matches '34'.
    """
    return predicted_article.startswith(human_article)

def calculate_precision_recall_legal_articles(human_labeled_set, predicted_set):
    # Extract and normalize article references
    human_articles = {extract_article_reference(s) for s in human_labeled_set}
    predicted_articles = {extract_article_reference(s) for s in predicted_set}

    # Remove None values that might occur if extraction fails
    human_articles.discard(None)
    predicted_articles.discard(None)

    # Handling edge case where either set is empty
    if not human_articles or not predicted_articles:
        return 0, 0

    # Calculate True Positives (TP), False Positives (FP), and False Negatives (FN)
    true_positives = {pred for pred in predicted_articles for hum in human_articles if is_subset_article(hum, pred)}
    false_positives = predicted_articles.difference(true_positives)
    false_negatives = {hum for hum in human_articles if not any(is_subset_article(hum, pred) for pred in predicted_articles)}

    # Calculate Precision and Recall
    precision = len(true_positives) / len(predicted_articles)
    recall = len(true_positives) / len(human_articles)

    return precision, recall

# Example usage
human_labeled = ["Art. 34 OR", "Art. 45a Abs. 3 OR", "Art. 67 Abs. 4 OR"]
predicted = ["Art. 34a OR", "Art. 45 OR", "Art. 67b Abs. 2 OR"]

precision, recall = calculate_precision_recall_legal_articles(human_labeled, predicted)
print("Precision:", precision)
print("Recall:", recall)


Precision: 0.6666666666666666
Recall: 0.6666666666666666
