In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForTokenClassification
import re
import spacy
import json
from sklearn.metrics import classification_report
import pandas as pd
from pathlib import Path
from collections import defaultdict, Counter


Label Mapping der einzelnen Identifier

In [2]:
# Define priority for conflict resolution: higher values indicate stronger precedence
PRIORITY_MAP = {
    "regex": 3,       # Regex detections have the highest priority
    "piiranha": 1,    # Piiranha has the lowest priority
    "spacy": 2        # spaCy takes middle priority
}

# Define the set of labels that should be extracted
TARGET_LABELS = [
    "TITEL", "VORNAME", "NACHNAME", "FIRMA", "TELEFONNUMMER", "EMAIL", "FAX", "STRASSE",
    "HAUSNUMMER", "POSTLEITZAHL", "WOHNORT", "ZÄHLERNUMMER", "ZÄHLERSTAND", "VERTRAGSNUMMER",
    "ZAHLUNG", "BANK", "IBAN", "BIC", "DATUM", "GESENDET_MIT", "LINK"
]

# Maps labels from different sources (spaCy, Piiranha) to unified categories
LABEL_MAP = {
    # spaCy labels
    "PER": "NAME",
    "LOC": "ADRESSE",
    "ORG": "FIRMA",
    "DATE": "DATUM",
    "TIME": "DATUM",
    "GPE": "ADRESSE",
    "NORP": "GRUPPE",
    "MONEY": "ZAHLUNG",

    # Piiranha labels
    "I-GIVENNAME": "NAME",
    "I-SURNAME": "NAME",
    "I-DATEOFBIRTH": "DATUM",
    "I-EMAIL": "KONTAKT",
    "I-TELEPHONENUM": "KONTAKT",
    "I-USERNAME": "KONTAKT",
    "I-CREDITCARDNUMBER": "ZAHLUNG",
    "I-ACCOUNTNUM": "VERTRAG",
    "I-BILLINGNUM": "VERTRAG",
    "I-IDCARDNUM": "VERTRAG",
    "I-TAXNUM": "VERTRAG",
    "I-CITY": "ADRESSE",
    "I-ZIPCODE": "ADRESSE",
    "I-STREET": "ADRESSE",
    "I-BUILDINGNUM": "ADRESSE"
}

# Regular expression patterns used for matching key entity types
REGEX_PATTERNS = {
    # Strong and well-defined email pattern
    "EMAIL": r"\b[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z]{2,}\b",

    # IBAN pattern specific to German format (22 characters)
    "IBAN": r"\bDE\d{20}\b",

    # BIC (SWIFT code) format
    "BIC": r"\b[A-Z]{6}[A-Z2-9][A-NP-Z0-9]{1}([A-Z0-9]{3})?\b",

    # URL pattern: includes http, https, or www
    "URL": r"\bhttps?://[^\s]+|www\.[^\s]+\b",

    # Contract number pattern: must be prefixed by a keyword and contain digits
    "VERTRAG": r"\b(vertragsnummer|vertragsnr\.?|vnr|vn)[\s:]{1,3}\d{7,10}\b",

    # Well-formed dates, including ISO and German styles
    "DATUM": (
        r"\b\d{2}\.\d{2}\.\d{4}\b|"      # e.g., 15.08.2024
        r"\b\d{4}-\d{2}-\d{2}\b|"        # e.g., 2024-08-15
        r"\b(19|20)\d{2}\b"              # Four-digit years
    ),

    # German phone number format, must include country code
    "TELEFON": r"\b\+49\s?\d[\d\s/-]{6,}\b",

    # Alphanumeric meter numbers, typically longer than 10 characters
    "ZÄHLERNUMMER": r"\b[A-Z]{2}[A-Z0-9]{8,}\b",

    # Payment amount pattern: includes currency keywords
    "ZAHLUNG": r"\b\d{1,5}[.,]\d{2}\s?(€|Euro|EUR|Cent)\b",

    # Street pattern: must end with common street suffixes
    "STRASSE": r"\b\w+(straße|gasse|allee|weg|platz|str\.|grund)\b"
}

PIIranha Spans

In [3]:
# Load the Piiranha model and tokenizer from HuggingFace
model_name = "iiiorg/piiranha-v1-detect-personal-information"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForTokenClassification.from_pretrained(model_name)

# Select device: GPU if available, otherwise CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Function to extract entity spans from text using Piiranha
def get_piiranha_spans(text):
    # Tokenize input and get offset mappings (to map tokens back to character positions)
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, return_offsets_mapping=True)
    inputs = {k: v.to(device) for k, v in inputs.items()}
    offset_mapping = inputs.pop("offset_mapping")[0].tolist()

    # Inference: get model predictions without gradient calculation
    with torch.no_grad():
        outputs = model(**inputs)
    predictions = torch.argmax(outputs.logits, dim=-1)[0].tolist()

    spans = []
    current_label = None
    current_start = None

    # Iterate over each token and its offset
    for i, (start, end) in enumerate(offset_mapping):
        if start == end:
            continue  # Skip special tokens like [CLS], [SEP]

        # Get predicted label and map it to a simplified label name
        raw_label = model.config.id2label[predictions[i]]
        mapped_label = LABEL_MAP.get(raw_label, None)

        # If the label is one of the target labels, track span start and continuation
        if mapped_label in TARGET_LABELS:
            if current_label == mapped_label:
                continue  # Continue current span
            else:
                # If label changes, close previous span and start new one
                if current_label is not None:
                    spans.append({"start": current_start, "end": offset_mapping[i-1][1], "label": current_label})
                current_label = mapped_label
                current_start = start
        else:
            # If no valid label, close previous span if one was open
            if current_label is not None:
                spans.append({"start": current_start, "end": offset_mapping[i-1][1], "label": current_label})
                current_label = None
                current_start = None

    # Finalize last span if still open
    if current_label is not None:
        spans.append({"start": current_start, "end": offset_mapping[-1][1], "label": current_label})

    return spans

SpaCy Ruler laden & Spans

In [4]:
# -----------------------------
# Select the spaCy model
# -----------------------------

# Load the spaCy model depending on your setup
# Uncomment one of the following if needed

# Trained on synthetic data (SynthB)
# nlp = spacy.load("../custom_spacy_model_synthetic_data_b_push")

# Trained on original data (OrigData)
# nlp = spacy.load("../custom_spacy_model_doccano_labeling")

# Untrained base model
nlp = spacy.load("de_core_news_md")

# Toggle for activating EntityRuler
use_ruler = True

if use_ruler:
    # Remove existing entity_ruler if present
    if "entity_ruler" in nlp.pipe_names:
        nlp.remove_pipe("entity_ruler")

    # Add a new entity_ruler before the NER component
    ruler = nlp.add_pipe("entity_ruler", before="ner")

    # Define patterns (Regex-based, no Gazetteer)
    strasse_patterns = [
        {
            "label": "STRASSE",
            "pattern": [
                {"TEXT": {"REGEX": r".*(straße|gasse|allee|weg|platz|str.|grund)$"}},
                {"TEXT": {"REGEX": r"^\d+[a-zA-Z]?$"}}
            ]
        }
    ]

    vertragsnummer_patterns = [
        {
            "label": "VERTRAGSNUMMER",
            "pattern": [
                {"LOWER": {"IN": ["vertragsnummer", "vertragsnr.", "vnr", "vn"]}},
                {"IS_PUNCT": True, "OP": "*"},
                {"TEXT": {"REGEX": r"^\d{6,12}\\.?$"}}
            ]
        }
    ]

    kundennummer_patterns = [
        {
            "label": "KUNDENNUMMER",
            "pattern": [
                {"LOWER": {"IN": ["kundennummer", "kundennr.", "kdnr", "kd"]}},
                {"IS_PUNCT": True, "OP": "*"},
                {"TEXT": {"REGEX": r"^\d{6,12}\\.?$"}}
            ]
        }
    ]

    zahlung_pattern = [
        {
            "label": "ZAHLUNG",
            "pattern": [
                {"TEXT": {"REGEX": r"^\d+[.,]?\d{0,2}$"}},
                {"TEXT": {"REGEX": r"^(\u20ac|euro|eur)$"}}
            ]
        }
    ]

    email_pattern = [
        {
            "label": "EMAIL",
            "pattern": [
                {"TEXT": {"REGEX": r"^[\w\.-]+@[\w\.-]+\.\w{2,}$"}}
            ]
        }
    ]

    telefon_pattern = [
        {
            "label": "TELEFON",
            "pattern": [
                {"TEXT": {"REGEX": r"^(\+49|0)[\d\s/-]{7,}$"}}
            ]
        }
    ]

    url_pattern = [
        {
            "label": "LINK",
            "pattern": [
                {"TEXT": {"REGEX": r"^https?://[\w\-\.]+\.\w{2,}(/[\w\-\.]*)*$"}}
            ]
        },
        {
            "label": "LINK",
            "pattern": [
                {"TEXT": {"REGEX": r"^www\.[\w\-\.]+\.\w{2,}(/[\w\-\.]*)*$"}}
            ]
        }
    ]

    datum_pattern = [
        {
            "label": "DATUM",
            "pattern": [
                {"TEXT": {"REGEX": r"^(\d{1,2}[./-]){2}\d{2,4}$"}}
            ]
        },
        {
            "label": "DATUM",
            "pattern": [
                {"TEXT": {"REGEX": r"^\d{4}-\d{2}-\d{2}$"}}
            ]
        }
    ]

    # Add all patterns to the ruler
    ruler.add_patterns(
        zahlung_pattern + url_pattern + email_pattern + telefon_pattern +
        strasse_patterns + vertragsnummer_patterns + kundennummer_patterns + datum_pattern
    )

    # (Optional) Save model with EntityRuler
    output_dir_ruler = Path("custom_spacy_model_with_ruler")
    output_dir_ruler.mkdir(exist_ok=True)
    nlp.to_disk(output_dir_ruler)
    print(f"✅ Model with EntityRuler saved at: {output_dir_ruler.resolve()}")

✅ Model with EntityRuler saved at: C:\Users\morit\OneDrive\Uni\02_Master\05_Studium\02_Semester_II\Data Analytics in Applications\VSCode\daia-eon\notebooks\3_model_training_and_testing\spacy_pipeline\piiranha_refinement\custom_spacy_model_with_ruler


In [5]:
def get_spacy_spans(text):
    doc = nlp(text)
    spans = []
    for ent in doc.ents:
        label = LABEL_MAP.get(ent.label_, ent.label_)
        if label in TARGET_LABELS:
            spans.append({"start": ent.start_char, "end": ent.end_char, "label": label})
    return spans

Regex Spans

In [6]:
def get_regex_spans(text):
    spans = []
    for raw_label, pattern in REGEX_PATTERNS.items():
        mapped_label = LABEL_MAP.get(raw_label, raw_label)  # bleibt bei IBAN = IBAN
        if mapped_label not in TARGET_LABELS:
            continue
        for match in re.finditer(pattern, text):
            spans.append({
                "start": match.start(),
                "end": match.end(),
                "label": mapped_label
            })
    return spans

In [9]:
# Beispieltext zum Testen
sample_text = """
Sehr geehrter Herr John Doe,
Ihre Kundennummer 4012345678 ist aktiv.
Bitte kontaktieren Sie uns unter max@eon.de oder +49 171 1234567.
Ihre Zahlung über 89,99 € wurde am 15-08-2024 verbucht.
"""

# PIIranha-Spans abrufen
piiranha_spans = get_piiranha_spans(sample_text)
spacy_spans = get_spacy_spans(sample_text)
regex_spans = get_regex_spans(sample_text)

# Ergebnisse ausgeben
print(piiranha_spans)
print(spacy_spans)
print(regex_spans)

[]
[{'start': 103, 'end': 113, 'label': 'EMAIL'}, {'start': 154, 'end': 161, 'label': 'ZAHLUNG'}]
[{'start': 103, 'end': 113, 'label': 'EMAIL'}, {'start': 177, 'end': 181, 'label': 'DATUM'}]


In [10]:
# 🧠 Optional: Merge overlapping or duplicate spans by sorting them
def merge_spans(spans):
    return sorted(spans, key=lambda x: x['start'])

# 🔄 Resolve conflicts between overlapping spans based on priority
def resolve_conflicts(spans):
    # Sort spans: first by start index, then by descending length (longer first), then by priority
    spans = sorted(
        spans,
        key=lambda x: (
            x["start"],
            -(x["end"] - x["start"]),
            -PRIORITY_MAP.get(x.get("source", ""), 0)
        )
    )

    resolved = []
    occupied = set()

    for span in spans:
        span_range = set(range(span["start"], span["end"]))
        conflict = False

        for existing in resolved:
            existing_range = set(range(existing["start"], existing["end"]))

            # ❌ If spans overlap
            if span_range & existing_range:
                # ➕ If one span is fully inside the other → decide based on priority
                if span["start"] >= existing["start"] and span["end"] <= existing["end"]:
                    if PRIORITY_MAP[span["source"]] > PRIORITY_MAP[existing["source"]]:
                        resolved.remove(existing)
                        break
                    else:
                        conflict = True
                        break
                elif existing["start"] >= span["start"] and existing["end"] <= span["end"]:
                    if PRIORITY_MAP[span["source"]] < PRIORITY_MAP[existing["source"]]:
                        conflict = True
                        break
                    else:
                        resolved.remove(existing)
                        break
                else:
                    # True overlap, not nested – reject current span
                    conflict = True
                    break

        if not conflict:
            resolved.append(span)
            occupied.update(span_range)

    return resolved

# 🔐 Apply final redaction using label numbering (e.g., NAME_1, NAME_2, ...)
def apply_final_redaction(text, spans):
    spans = sorted(spans, key=lambda x: x["start"])
    redacted = []
    last_index = 0
    label_counter = defaultdict(int)

    for span in spans:
        label = span["label"]
        label_counter[label] += 1
        label_with_id = f"{label}_{label_counter[label]}"

        # Keep text before the span
        redacted.append(text[last_index:span["start"]])
        # Insert replacement token
        redacted.append(f"[{label_with_id}]")
        # Update position pointer
        last_index = span["end"]

    # Append any remaining text
    redacted.append(text[last_index:])
    return ''.join(redacted)

# 🧩 Main masking function using multiple components (e.g., regex, spacy, piiranha)
def mask_text_with_all(text, components=["regex"]):
    all_spans = []

    if "regex" in components:
        for span in get_regex_spans(text):
            span["source"] = "regex"
            all_spans.append(span)

    if "piiranha" in components:
        for span in get_piiranha_spans(text):
            span["source"] = "piiranha"
            all_spans.append(span)

    if "spacy" in components:
        for span in get_spacy_spans(text):
            span["source"] = "spacy"
            all_spans.append(span)

    # 🔧 Resolve span conflicts and apply redaction
    spans = resolve_conflicts(all_spans)
    merged = merge_spans(spans)
    return apply_final_redaction(text, merged)

# 🎯 Mask text using only a single component (for testing or analysis)
def mask_text_with_single_component(text, component="regex"):
    if component == "regex":
        all_spans = get_regex_spans(text)
    elif component == "piiranha":
        all_spans = get_piiranha_spans(text)
    elif component == "spacy":
        all_spans = get_spacy_spans(text)
    else:
        raise ValueError(f"Unknown component: {component}")

    # Optionally resolve internal conflicts within single component spans
    spans = resolve_conflicts(all_spans)
    merged = merge_spans(spans)

    # Return the redacted version of the input text
    return apply_final_redaction(text, merged)


In [15]:
sample = "01.08.2023\n Mein Name ist Isabelle Eckhauer : (+49 (0) 5402 008802)\n"
        
print(get_piiranha_spans(sample))
print(get_spacy_spans(sample))
print(get_regex_spans(sample))

text = mask_text_with_all(sample)
print(text)

[]
[{'start': 0, 'end': 10, 'label': 'DATUM'}]
[{'start': 0, 'end': 10, 'label': 'DATUM'}]
[DATUM_1]
 Mein Name ist Isabelle Eckhauer : (+49 (0) 5402 008802)



In [16]:
# Load ground truth test data
with open("../../../../data/original/ground_truth_split/test_norm.json", "r", encoding="utf-8") as f:
    test_data = json.load(f)

# Helper function: convert span dictionary to tuple
def to_tuple(span):
    return (span["start"], span["end"], span["label"])

# Evaluate predicted spans against gold standard
def evaluate_entities(pred_fn, data, threshold=0.8):
    stats = defaultdict(lambda: {"tp": 0, "fp": 0, "fn": 0})
    total_tp = total_fp = total_fn = 0

    for entry in data:
        text = entry["text"]
        gold_spans = [to_tuple(s) for s in entry["labels"]]
        pred_spans = [to_tuple(s) for s in pred_fn(text)]
        matched_gold = set()
        matched_pred = set()

        # Try to match each predicted span with a gold span using Jaccard similarity
        for pi, p in enumerate(pred_spans):
            best_match = None
            best_overlap = 0
            for gi, g in enumerate(gold_spans):
                if g[2] != p[2]:  # Only match if labels are the same
                    continue
                overlap = max(0, min(p[1], g[1]) - max(p[0], g[0]))
                union = max(p[1], g[1]) - min(p[0], g[0])
                jaccard = overlap / union if union > 0 else 0
                if jaccard >= threshold and jaccard > best_overlap:
                    best_overlap = jaccard
                    best_match = gi
            if best_match is not None:
                matched_gold.add(best_match)
                matched_pred.add(pi)
                stats[p[2]]["tp"] += 1
                total_tp += 1

        # Count false positives: predicted spans without gold match
        for i, p in enumerate(pred_spans):
            if i not in matched_pred:
                stats[p[2]]["fp"] += 1
                total_fp += 1

        # Count false negatives: gold spans without prediction
        for i, g in enumerate(gold_spans):
            if i not in matched_gold:
                stats[g[2]]["fn"] += 1
                total_fn += 1

    # Convert statistics to rows for DataFrame
    rows = []
    for label, counts in stats.items():
        tp, fp, fn = counts["tp"], counts["fp"], counts["fn"]
        precision = tp / (tp + fp) if (tp + fp) > 0 else 0
        recall    = tp / (tp + fn) if (tp + fn) > 0 else 0
        f1        = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0
        rows.append({
            "component": "Combined",
            "label": label,
            "tp": tp,
            "fp": fp,
            "fn": fn,
            "precision": precision,
            "recall": recall,
            "f1": f1
        })

    # Add overall performance row
    overall_precision = total_tp / (total_tp + total_fp) if (total_tp + total_fp) > 0 else 0
    overall_recall    = total_tp / (total_tp + total_fn) if (total_tp + total_fn) > 0 else 0
    overall_f1        = 2 * overall_precision * overall_recall / (overall_precision + overall_recall) if (overall_precision + overall_recall) > 0 else 0

    rows.append({
        "component": "Combined",
        "label": "OVERALL",
        "tp": total_tp,
        "fp": total_fp,
        "fn": total_fn,
        "precision": overall_precision,
        "recall": overall_recall,
        "f1": overall_f1
    })

    return pd.DataFrame(rows)

# Prediction function that combines multiple components and resolves conflicts
def run_combined_spans(text):
    all_spans = []

    # Uncomment if you want to use additional components
    '''
    for span in get_regex_spans(text):
        span["source"] = "regex"
        all_spans.append(span)

    for span in get_piiranha_spans(text):
        span["source"] = "piiranha"
        all_spans.append(span)
    '''

    for span in get_spacy_spans(text):
        span["source"] = "spacy"
        all_spans.append(span)

    resolved = resolve_conflicts(all_spans)
    return merge_spans(resolved)

# Execute evaluation and export results
df_eval = evaluate_entities(run_combined_spans, test_data, threshold=0.5)
df_eval.to_csv("Results_synthetic_data_b_only_spacy_no_ruler.csv", index=False)
print("Evaluation results saved to: evaluation_entity_level_combined.csv")


Evaluation results saved to: evaluation_entity_level_combined.csv


In [17]:
df_eval

Unnamed: 0,component,label,tp,fp,fn,precision,recall,f1
0,Combined,LINK,3,1,0,0.75,1.0,0.857143
1,Combined,FIRMA,3,10,3,0.230769,0.5,0.315789
2,Combined,ZAHLUNG,3,0,5,1.0,0.375,0.545455
3,Combined,GESENDET_MIT,0,0,6,0.0,0.0,0.0
4,Combined,VERTRAGSNUMMER,0,0,40,0.0,0.0,0.0
5,Combined,VORNAME,0,0,57,0.0,0.0,0.0
6,Combined,NACHNAME,0,0,61,0.0,0.0,0.0
7,Combined,DATUM,9,0,21,1.0,0.3,0.461538
8,Combined,IBAN,0,0,4,0.0,0.0,0.0
9,Combined,TITEL,0,0,8,0.0,0.0,0.0


In [261]:
def extract_error_spans(pred_fn, data, threshold=0.8):
    false_positives = []
    false_negatives = []

    def to_tuple(span):
        return (span["start"], span["end"], span["label"])

    for entry in data:
        text = entry["text"]
        gold_spans = [to_tuple(s) for s in entry["labels"]]
        pred_spans = [to_tuple(s) for s in pred_fn(text)]
        matched_gold = set()
        matched_pred = set()

        for pi, p in enumerate(pred_spans):
            for gi, g in enumerate(gold_spans):
                if g[2] != p[2]:
                    continue
                # Overlap berechnen
                overlap = max(0, min(p[1], g[1]) - max(p[0], g[0]))
                union = max(p[1], g[1]) - min(p[0], g[0])
                jaccard = overlap / union if union > 0 else 0
                if jaccard >= threshold:
                    matched_gold.add(gi)
                    matched_pred.add(pi)
                    break  # nur erster Treffer zählt

        # False Positives
        for pi, p in enumerate(pred_spans):
            if pi not in matched_pred:
                false_positives.append({
                    "type": "FP",
                    "text": text[p[0]:p[1]],
                    "label": p[2],
                    "start": p[0],
                    "end": p[1],
                    "source": "pred_only"
                })

        # False Negatives
        for gi, g in enumerate(gold_spans):
            if gi not in matched_gold:
                false_negatives.append({
                    "type": "FN",
                    "text": text[g[0]:g[1]],
                    "label": g[2],
                    "start": g[0],
                    "end": g[1],
                    "source": "gold_only"
                })

    return pd.DataFrame(false_positives + false_negatives)


In [262]:
df_errors = extract_error_spans(run_combined_spans, test_data, threshold=0.1)
df_errors.to_csv("error_analysis_partial_match.csv", index=False)

In [263]:
def run_component_spans(text, components=["spacy"]):
    all_spans = []

    if "regex" in components:
        for span in get_regex_spans(text):
            span["source"] = "regex"
            all_spans.append(span)

    if "piiranha" in components:
        for span in get_piiranha_spans(text):
            span["source"] = "piiranha"
            all_spans.append(span)

    if "spacy" in components:
        for span in get_spacy_spans(text):
            span["source"] = "spacy"
            all_spans.append(span)

    resolved = resolve_conflicts(all_spans)
    return merge_spans(resolved)


In [264]:
# Evaluation nur mit spaCy
df_spacy = evaluate_entities(lambda text: run_component_spans(text, ["spacy"]), test_data, threshold=0.5)
df_spacy["component"] = "spaCy"

# Evaluation nur mit Piiranha
df_piiranha = evaluate_entities(lambda text: run_component_spans(text, ["piiranha"]), test_data, threshold=0.5)
df_piiranha["component"] = "Piiranha"

# Evaluation nur mit Regex
df_regex = evaluate_entities(lambda text: run_component_spans(text, ["regex"]), test_data, threshold=0.5)
df_regex["component"] = "Regex"

# Evaluation kombiniert (optional)
df_combined = evaluate_entities(lambda text: run_component_spans(text, ["regex", "piiranha", "spacy"]), test_data, threshold=0.5)
df_combined["component"] = "Combined"

# Zusammenführen
df_all = pd.concat([df_spacy, df_piiranha, df_regex, df_combined], ignore_index=True)

# Nur OVERALL-Zeilen anzeigen
df_overall = df_all[df_all["label"] == "OVERALL"]

# CSV speichern
df_overall.to_csv("Results_entity_level_all_components_overall.csv", index=False)

# Anzeigen
print(df_overall)

   component    label  tp  fp   fn  precision    recall        f1
20     spaCy  OVERALL   3  10  317   0.230769  0.009375  0.018018
41  Piiranha  OVERALL   2   0  318   1.000000  0.006250  0.012422
63     Regex  OVERALL  29   8  291   0.783784  0.090625  0.162465
85  Combined  OVERALL  32  18  288   0.640000  0.100000  0.172973
