In [11]:
# CADEC span evaluation — notebook-only utilities (no argparse, no __main__)

import re
from pathlib import Path
from dataclasses import dataclass
from typing import List, Tuple, Set, Dict

@dataclass(frozen=True)
class Span:
    label: str
    start: int
    end: int

RANGE_RE = re.compile(r"(\d+)\s+(\d+)")

def parse_ann(path: Path) -> List[Span]:
    spans: List[Span] = []
    for line in path.read_text(encoding="utf-8").splitlines():
        line = line.strip()
        if not line or line[0] in {"#", "A", "R"} or not line.startswith("T"):
            continue
        parts = line.split("\t")
        if len(parts) < 2:
            continue
        head = parts[1]  # e.g. "ADR 9 19" or "ADR 9 19;29 50"
        label = head.split()[0]
        for m in RANGE_RE.finditer(head[len(label):]):
            s, e = int(m.group(1)), int(m.group(2))
            if e > s:
                spans.append(Span(label, s, e))
    # unique & sorted
    spans = sorted(set(spans), key=lambda x: (x.label, x.start, x.end))
    return spans

def to_set(spans: List[Span]) -> Set[Tuple[str,int,int]]:
    return {(s.label, s.start, s.end) for s in spans}

def prf1(tp: int, fp: int, fn: int) -> Tuple[float,float,float]:
    p = tp/(tp+fp) if tp+fp else 0.0
    r = tp/(tp+fn) if tp+fn else 0.0
    f = 2*p*r/(p+r) if p+r else 0.0
    return p, r, f

def evaluate_one(original_dir: Path, predicted_dir: Path, text_dir: Path, file_basename: str) -> Dict[str, float]:
    """
    file_basename: e.g. 'ARTHROTEC.17' (no extension). We will read:
      - {original_dir}/{file_basename}.ann  (gold)
      - {predicted_dir}/{file_basename}.ann (pred)
      - {text_dir}/{file_basename}.txt      (raw text)
    """
    base = file_basename[:-4] if file_basename.lower().endswith(".ann") else file_basename
    gold_ann = Path(original_dir) / f"{base}.ann"
    pred_ann = Path(predicted_dir) / f"{base}.ann"
    txt_path = Path(text_dir) / f"{base}.txt"

    if not gold_ann.exists():
        raise FileNotFoundError(f"Gold not found: {gold_ann}")
    if not pred_ann.exists():
        raise FileNotFoundError(f"Predicted not found: {pred_ann}")
    if not txt_path.exists():
        raise FileNotFoundError(f"Text not found: {txt_path}")

    raw = txt_path.read_text(encoding="utf-8")
    gold = parse_ann(gold_ann)
    pred = parse_ann(pred_ann)

    def with_text(spans: List[Span]):
        out = []
        for s in spans:
            seg = raw[s.start:s.end].replace("\n", " ")
            out.append((s.label, s.start, s.end, seg))
        return out

    gold_t = with_text(gold)
    pred_t = with_text(pred)

    print("\n--- Ground Truth Entities ---")
    for t in gold_t:
        print(t)

    print("\n--- Predicted Entities ---")
    for t in pred_t:
        print(t)

    gset, pset = to_set(gold), to_set(pred)
    tp = len(gset & pset)
    fp = len(pset - gset)
    fn = len(gset - pset)
    P, R, F = prf1(tp, fp, fn)

    print("\n--- Evaluation Metrics ---")
    print(f"Precision: {P:.2f}")
    print(f"Recall:    {R:.2f}")
    print(f"F1-score:  {F:.2f}")

    return {"precision": P, "recall": R, "f1": F, "tp": tp, "fp": fp, "fn": fn}

def list_basenames(folder: Path) -> List[str]:
    """Return basenames (without extension) for .ann files in a folder."""
    return sorted(p.stem for p in folder.glob("*.ann"))


In [12]:
from pathlib import Path

# ---- EDIT THESE THREE PATHS TO MATCH YOUR MACHINE ----
original_dir = Path("/Users/anjalikulkarni/Desktop/Assignment1/CADEC-lPWNPfjE-/data/cadec/original")  # gold annotations
predicted_dir = Path("/Users/anjalikulkarni/Desktop/Assignment1/predicted")                             # your model outputs
text_dir      = Path("/Users/anjalikulkarni/Desktop/Assignment1/CADEC-lPWNPfjE-/data/cadec/text")       # raw texts

# 1) Show what's available so you can pick the correct basename
print("Gold files (first 20):")
golds = list_basenames(original_dir)
print(golds[:20], f"... total={len(golds)}")

print("\nPredicted files (first 20):")
preds = list_basenames(predicted_dir)
print(preds[:20], f"... total={len(preds)}")

# 2) Pick the file you want to score (must exist in BOTH gold and predicted)
# Example: "ARTHROTEC.17" OR "VOLTAREN.9" etc. No extension.
file_basename = "ARTHROTEC.17"   # <-- change this to one that appears in BOTH lists

if file_basename not in golds:
    raise FileNotFoundError(f"'{file_basename}.ann' not in gold dir {original_dir}")
if file_basename not in preds:
    raise FileNotFoundError(f"'{file_basename}.ann' not in predicted dir {predicted_dir}")

# 3) Run evaluation
metrics = evaluate_one(original_dir, predicted_dir, text_dir, file_basename)
metrics


Gold files (first 20):
['ARTHROTEC.1', 'ARTHROTEC.10', 'ARTHROTEC.100', 'ARTHROTEC.101', 'ARTHROTEC.102', 'ARTHROTEC.103', 'ARTHROTEC.104', 'ARTHROTEC.105', 'ARTHROTEC.106', 'ARTHROTEC.107', 'ARTHROTEC.108', 'ARTHROTEC.109', 'ARTHROTEC.11', 'ARTHROTEC.110', 'ARTHROTEC.111', 'ARTHROTEC.112', 'ARTHROTEC.113', 'ARTHROTEC.114', 'ARTHROTEC.115', 'ARTHROTEC.116'] ... total=1250

Predicted files (first 20):
['ARTHROTEC.1', 'ARTHROTEC.10', 'ARTHROTEC.100', 'ARTHROTEC.11', 'ARTHROTEC.12', 'ARTHROTEC.13', 'ARTHROTEC.14', 'ARTHROTEC.15', 'ARTHROTEC.16', 'ARTHROTEC.17', 'ARTHROTEC.18', 'ARTHROTEC.19', 'ARTHROTEC.2', 'ARTHROTEC.20', 'ARTHROTEC.21', 'ARTHROTEC.22', 'ARTHROTEC.23', 'ARTHROTEC.24', 'ARTHROTEC.25', 'ARTHROTEC.26'] ... total=103

--- Ground Truth Entities ---
('ADR', 74, 88, 'abdominal pain')
('ADR', 100, 117, 'mentstrual cramps')
('ADR', 118, 135, 'diarreah cramping')
('ADR', 221, 241, 'lower abdominal pain')
('ADR', 400, 408, 'GI bleed')
('ADR', 477, 501, 'heavy menstrual bleeding')
(

{'precision': 0.36363636363636365,
 'recall': 0.26666666666666666,
 'f1': 0.30769230769230765,
 'tp': 4,
 'fp': 7,
 'fn': 11}