CADEC extractor — notebook-only library (no .py references)

In [1]:
from __future__ import annotations
import os, re, json
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Dict, Iterable, List, Optional, Tuple

In [2]:
# --- optional: load .env if present (safe in notebooks)
try:
    from dotenv import load_dotenv
    load_dotenv()
except Exception:
    pass

In [3]:
# --- env sanitization (prevents latin-1 header errors in HTTP headers)
def _clean_ascii_header_value(s: str) -> str:
    s = (s or "")
    s = s.replace("Bearer ", "")
    s = s.strip().strip('"').strip("'")
    s = re.sub(r"[^\x20-\x7E]", "", s)  # printable ASCII only
    return s

In [4]:
DEFAULT_MODEL = os.getenv("DEFAULT_MODEL", "llama-3.1-8b-instant")
GROQ_BASEURL = os.getenv("GROQ_BASEURL", "https://api.groq.com/openai/v1")
GROQ_API_KEY = _clean_ascii_header_value(os.getenv("GROQ_API_KEY") or "")
if not GROQ_API_KEY:
    raise RuntimeError("GROQ_API_KEY is missing/invalid. Set it in the notebook env or .env (GROQ_API_KEY=sk_...).")
os.environ["GROQ_API_KEY"] = GROQ_API_KEY  # keep sanitized

In [5]:
MAX_CHARS = 1500
TIMEOUT = 60

In [6]:
# --- labels & priority
BASE_LABELS = ("ADR", "Drug", "Disease", "Symptom")
PRIORITY_BASE = {"ADR": 4, "Drug": 3, "Disease": 2, "Symptom": 1}
PRIORITY_WITH_FINDING = {"ADR": 5, "Drug": 4, "Disease": 3, "Symptom": 2, "Finding": 1}

In [7]:
# --- data
@dataclass
class Range:
    start: int
    end: int

In [8]:
@dataclass
class RawSpan:
    label: str
    ranges: List[Range]
    text: str

In [9]:
@dataclass
class Span:
    label: str
    ranges: List[Range]

In [10]:
# --- prompt builder
def build_system_prompt(keep_finding: bool, allow_procedures: bool, distance_policy: str) -> str:
    labels_list = list(BASE_LABELS)
    if keep_finding:
        labels_list.append("Finding")
    labels_str = ", ".join(labels_list)

    drop_bits = [
        "- Meta phrases (e.g., 'possible side effects', 'side effects' when not the patient’s actual event).",
        "- Generic terms like 'drug', 'medicine', 'medication', 'tablet', 'pill' unless a real brand/generic product name.",
        "- Dosing schedules ('twice per day', '2x/day', 'every morning').",
        "- Negated mentions in the same clause ('without bleeding', 'no cramps').",
    ]
    if not allow_procedures:
        drop_bits.append("- Procedures/plans ('surgery', 'operation', 'procedure', 'injection', 'epidural steroid injection') unless clearly the adverse event itself.")
    if distance_policy == "drop":
        drop_bits.append("- Distances/quantities by themselves ('100 meters', '1/2 km', '10 years').")

    drop_block = "\n".join(drop_bits)

    return f"""You are a clinical annotation assistant for CADEC forum posts.
Return spans ONLY as strict JSON.

ALLOWED labels (only these): {labels_str}.

OFFSETS:
- Character offsets are 0-based, end-exclusive, and MUST be within THIS CHUNK ONLY.
- Provide one or more ranges per span for discontiguous mentions.
- Every range MUST match exact text; do not hallucinate.

WHAT TO LABEL:
- Concrete, patient-experienced clinical events or conditions.
- Prefer the most specific phrase ('lower abdominal pain' over 'pain').
- If medication/brand or dosing context is present and the text indicates a side-effect, prefer ADR over Symptom.

DO NOT LABEL (drop these):
{drop_block}

Sort spans by first range start then end. Keep output minimal and precise.

Return JSON ONLY:
{{"spans":[{{"label":"{'|'.join(labels_list)}","ranges":[{{"start":int,"end":int}}],"text":"verbatim from text"}}]}}"""

In [11]:
USER_TEMPLATE = """CHUNK (local offsets 0..{n}):
{chunk}

Output JSON ONLY, no commentary."""

In [12]:
# --- groq call (notebook-safe)
def call_groq(model: str, system: str, user: str, temperature: float = 0.0) -> str:
    import requests
    key = os.environ["GROQ_API_KEY"]  # already sanitized
    url = f"{GROQ_BASEURL}/chat/completions"
    headers = {"Authorization": f"Bearer {key}", "Content-Type": "application/json"}
    payload = {
        "model": model,
        "temperature": temperature,
        "response_format": {"type": "json_object"},
        "messages": [
            {"role": "system", "content": system},
            {"role": "user", "content": user},
        ],
    }
    r = requests.post(url, headers=headers, json=payload, timeout=TIMEOUT)
    r.raise_for_status()
    data = r.json()
    return data["choices"][0]["message"]["content"]

In [13]:
# --- chunking
def chunk_text(text: str, max_chars: int = MAX_CHARS) -> List[Tuple[int, str]]:
    chunks: List[Tuple[int, str]] = []
    i, n = 0, len(text)
    while i < n:
        j = min(i + max_chars, n)
        cut = text.rfind("\n", i, j)
        if cut == -1:
            cut = text.rfind(". ", i, j)
        if cut == -1 or cut <= i + int(0.5 * max_chars):
            cut = j
        chunks.append((i, text[i:cut]))
        i = cut
    return chunks

In [14]:
# --- json & sanitization
WHITESPACE_RE = re.compile(r"\s+")
DURATION_RE = re.compile(r"^\s*\d+\s+(years?|months?|weeks?|days?)\b", re.I)

In [15]:
def parse_json_strict(s: str) -> Dict[str, Any]:
    try:
        return json.loads(s)
    except Exception:
        m = re.search(r"\{.*\}$", s.strip(), flags=re.S) or re.search(r"\{.*\}", s, flags=re.S)
        if not m:
            raise
        return json.loads(m.group(0))

In [16]:
def sanitize_raw_spans(resp: Dict[str, Any], chunk_len: int, allowed_labels: List[str]) -> List[RawSpan]:
    out: List[RawSpan] = []
    for item in (resp.get("spans") or []):
        label = item.get("label")
        if label not in allowed_labels:
            continue
        text = (item.get("text") or "").strip()
        ranges: List[Range] = []
        for r in (item.get("ranges") or []):
            try:
                s = int(r["start"]); e = int(r["end"])
            except Exception:
                continue
            if not (0 <= s < e <= chunk_len):
                continue
            ranges.append(Range(s, e))
        if not ranges:
            continue
        uniq, seen = [], set()
        for rr in sorted(ranges, key=lambda x: (x.start, x.end)):
            key = (rr.start, rr.end)
            if key in seen: continue
            seen.add(key); uniq.append(rr)
        out.append(RawSpan(label=label, ranges=uniq, text=text))
    out.sort(key=lambda sp: (sp.ranges[0].start, sp.ranges[0].end))
    return out

In [17]:
# --- offset repair
def norm(s: str) -> str:
    return WHITESPACE_RE.sub(" ", s).strip().casefold()

In [18]:
def slice_join(chunk: str, ranges: List[Range]) -> str:
    return " ".join(chunk[r.start:r.end] for r in ranges)

In [19]:
def find_exact(chunk: str, needle: str) -> Optional[Tuple[int, int]]:
    if not needle:
        return None
    i = chunk.find(needle)
    if i != -1:
        return (i, i + len(needle))
    lc = chunk.casefold()
    ln = needle.casefold()
    j = lc.find(ln)
    if j != -1:
        return (j, j + len(needle))
    return None

In [20]:
def expand_to_word_boundaries(chunk: str, s: int, e: int) -> Tuple[int, int]:
    while s > 0 and (chunk[s-1].isalnum() or chunk[s-1] in "'-"):
        s -= 1
    n = len(chunk)
    while e < n and (chunk[e].isalnum() or chunk[e] in "'-"):
        e += 1
    return s, e

In [21]:
def is_plausible_text(txt: str) -> bool:
    t = txt.strip()
    if len(t) < 2: return False
    letters = sum(ch.isalpha() for ch in t)
    if letters == 0: return False
    return letters / max(1, len(t)) >= 0.3

In [22]:
def repair_raw_span(raw: RawSpan, chunk: str) -> Optional[Span]:
    joined = slice_join(chunk, raw.ranges)
    if norm(joined) == norm(raw.text):
        return Span(label=raw.label, ranges=raw.ranges)
    match = find_exact(chunk, raw.text)
    if match:
        s, e = match
        s, e = expand_to_word_boundaries(chunk, s, e)
        surf = chunk[s:e]
        if is_plausible_text(surf):
            return Span(label=raw.label, ranges=[Range(s, e)])
    s = min(r.start for r in raw.ranges)
    e = max(r.end for r in raw.ranges)
    s, e = expand_to_word_boundaries(chunk, s, e)
    surf = chunk[s:e]
    if is_plausible_text(surf):
        return Span(label=raw.label, ranges=[Range(s, e)])
    return None

In [23]:
# --- global utils
def to_global(spans: List[Span], base: int) -> List[Span]:
    return [Span(sp.label, [Range(base + r.start, base + r.end) for r in sp.ranges]) for sp in spans]

In [24]:
def span_hull(sp: Span) -> Tuple[int, int]:
    s = min(r.start for r in sp.ranges)
    e = max(r.end for r in sp.ranges)
    return s, e

In [25]:
def resolve_conflicts(spans: List[Span], keep_finding: bool) -> List[Span]:
    priority = PRIORITY_WITH_FINDING if keep_finding else PRIORITY_BASE
    spans = sorted(spans, key=lambda sp: (span_hull(sp)[0], -(span_hull(sp)[1]-span_hull(sp)[0]), -priority.get(sp.label, 0)))
    kept: List[Span] = []
    for sp in spans:
        s, e = span_hull(sp)
        conflict = False
        for kp in kept:
            ks, ke = span_hull(kp)
            if not (e <= ks or ke <= s):
                if priority.get(kp.label, 0) > priority.get(sp.label, 0):
                    conflict = True; break
                if priority.get(kp.label, 0) == priority.get(sp.label, 0) and (ke-ks) >= (e-s):
                    conflict = True; break
        if not conflict:
            kept.append(sp)
    uniq, seen = [], set()
    for sp in kept:
        key = (sp.label, tuple((r.start, r.end) for r in sp.ranges))
        if key in seen: continue
        seen.add(key); uniq.append(sp)
    uniq.sort(key=lambda sp: (span_hull(sp)[0], span_hull(sp)[1], -priority.get(sp.label, 0)))
    return uniq

In [26]:
def clip_to_text(spans: List[Span], text: str) -> List[Span]:
    n = len(text); out: List[Span] = []
    for sp in spans:
        rr = [r for r in sp.ranges if 0 <= r.start < r.end <= n]
        if rr:
            out.append(Span(sp.label, rr))
    return out

In [27]:
# --- policy-aware post-filters
META_PHRASES_RE = re.compile(r"\b(side effect|side effects|possible side effects?)\b", re.I)
FREQUENCY_RE = re.compile(r"\b(?:once|twice|thrice|\d+\s*(?:x|times?))\s*(?:per|a)\s*(?:day|week|month|hour)s?\b", re.I)
DISTANCE_SURF_RE = re.compile(r"^\s*\d+(?:\.\d+)?\s*(?:m|km|meter|meters|kilometer|kilometers|kms?)\s*$", re.I)
PROCEDURE_RE = re.compile(r"\b(surgery|operation|procedure|injection|epidural|steroid injection)\b", re.I)
GENERIC_DRUG_SURF_RE = re.compile(r"^(?:\b(this|that|the|my|his|her)\b\s+)?\b(drug|medicine|medication|tablet|pill)s?\b$", re.I)
WEEKDAY_RE = re.compile(r"^(mon(day)?|tue(sday)?|wed(nesday)?|thu(rsday)?|fri(day)?|sat(urday)?|sun(day)?)$", re.I)
NON_EVENT_PHRASE_RE = re.compile(r"\b(this|that|the)\s+poison\b", re.I)

In [28]:
SYMPTOM_HEAD_RE = re.compile(
    r"\b(pain|cramps?|bleeding|nausea|vomit(?:ing)?|diarr(?:hea|hoea)|diah?rea|diarh?ea|headache|dizz(?:y|iness)|rash|swelling)\b", re.I
)
MENSTRUAL_RE = re.compile(r"\b(menstrual|menstruation|periods?|menorrhagia|vaginal bleeding|bleeding from the vagina)\b", re.I)
MENSTRUAL_EXTRA_RE = re.compile(r"\b(menstrual cramps?|vaginal cramps?|uter(?:us|ine) contractions?)\b", re.I)

In [29]:
DOSAGE_RE = re.compile(r"\b\d+\s*(?:mg|mcg|g|ml|iu|units?|caps?|tabs?)\b", re.I)
MED_CUES_RE = re.compile(r"\b(took|taking|dose|dosing|tablet|pill|medication|medicine|nsaid|ibuprofen|advil|naproxen|arthrotec|drug)\b", re.I)

In [30]:
FUNC_VERBS = {"walk", "walking", "run", "running", "stand", "standing", "lift", "able", "unable", "can", "can't", "cannot", "limited", "limit"}

In [31]:
DRUG_LEXICON = {
    "Arthrotec","Misoprostol","Diclofenac","Ibuprofen","Paracetamol",
    "Advil","Naproxen","Voltaren","Lyrica","Lipitor","Cymbalta",
    "Clonidine","Tylenol","co-codamol","Pamprin"
}
DRUG_LEXICON_RE = re.compile(r"\b(" + "|".join(sorted(map(re.escape, DRUG_LEXICON))) + r")\b", re.I)
PROPER_BRAND_RE = re.compile(r"^[A-Z][A-Za-z0-9\-]{2,}$")

In [32]:
def build_span_text(text: str, ranges: List[Range]) -> str:
    return " ".join(text[r.start:r.end].replace("\n", " ").replace("\t", " ") for r in ranges)

In [33]:
def has_med_context(text: str) -> bool:
    return bool(DOSAGE_RE.search(text) or MED_CUES_RE.search(text))

In [34]:
def has_local_med_context(text: str, s: int, e: int, window: int = 120) -> bool:
    L = max(0, s - window); R = min(len(text), e + window)
    ctx = text[L:R]
    return bool(MED_CUES_RE.search(ctx) or DOSAGE_RE.search(ctx))

In [35]:
def has_functional_context(text: str, s: int, e: int, window: int = 60) -> bool:
    n = len(text); L = max(0, s - window); R = min(n, e + window)
    ctx = text[L:R].casefold()
    return any(v in ctx for v in FUNC_VERBS)

In [36]:
def post_filter_spans(
    spans: List[Span],
    text: str,
    keep_finding: bool,
    allow_procedures: bool,
    distance_policy: str,
    menstrual_policy: str,
    disease_symptom_relabel: str,
) -> List[Span]:
    out: List[Span] = []
    med_ctx = has_med_context(text)
    allowed_labels = set(BASE_LABELS) if not keep_finding else {"ADR", "Drug", "Disease", "Symptom", "Finding"}

    for sp in spans:
        if sp.label not in allowed_labels:
            continue

        surf = build_span_text(text, sp.ranges).strip()
        if len(surf) < 2:
            continue

        s0 = min(r.start for r in sp.ranges)
        e0 = max(r.end for r in sp.ranges)

        if sp.label == "Drug" and WEEKDAY_RE.fullmatch(surf):
            continue
        if sp.label == "ADR" and NON_EVENT_PHRASE_RE.search(surf):
            continue

        if META_PHRASES_RE.search(surf): 
            continue
        if FREQUENCY_RE.search(surf): 
            continue

        if not allow_procedures and PROCEDURE_RE.search(surf):
            continue

        if distance_policy == "drop" and DISTANCE_SURF_RE.match(surf):
            continue
        elif distance_policy == "functional" and DISTANCE_SURF_RE.match(surf):
            if not has_functional_context(text, s0, e0):
                continue
            if keep_finding and sp.label != "Finding":
                sp = Span(label="Finding", ranges=sp.ranges)

        if sp.label == "Drug":
            longish = ("," in surf) or (len(surf.split()) > 4)
            looks_brand = bool(DRUG_LEXICON_RE.search(surf) or PROPER_BRAND_RE.fullmatch(surf.strip()))
            dosage_near = bool(DOSAGE_RE.search(text[max(0, s0-40):min(len(text), e0+40)]))
            if GENERIC_DRUG_SURF_RE.match(surf):
                continue
            if longish and not (looks_brand or dosage_near):
                continue

        if DURATION_RE.match(surf):
            continue

        if disease_symptom_relabel == "on" and sp.label == "Disease" and SYMPTOM_HEAD_RE.search(surf):
            sp = Span(label=("ADR" if med_ctx else "Symptom"), ranges=sp.ranges)

        if MENSTRUAL_RE.search(surf) or MENSTRUAL_EXTRA_RE.search(surf):
            if menstrual_policy == "adr":
                sp = Span(label="ADR", ranges=sp.ranges)
            elif menstrual_policy == "symptom":
                sp = Span(label="Symptom", ranges=sp.ranges)
            else:
                sp = Span(label=("ADR" if med_ctx else "Symptom"), ranges=sp.ranges)

        if sp.label == "Symptom" and SYMPTOM_HEAD_RE.search(surf):
            if med_ctx or has_local_med_context(text, s0, e0, window=120):
                sp = Span(label="ADR", ranges=sp.ranges)

        if sp.label == "ADR":
            if DRUG_LEXICON_RE.search(surf) or PROPER_BRAND_RE.fullmatch(surf):
                sp = Span(label="Drug", ranges=sp.ranges)

        out.append(sp)

    return out

In [37]:
# --- enumeration splitter
LIST_SEP_RE = re.compile(r",")

In [38]:
def _trim_to_text_bounds(text: str, s: int, e: int) -> tuple[int, int]:
    while s < e and text[s].isspace(): s += 1
    while e > s and text[e-1].isspace(): e -= 1
    while s > 0 and (text[s-1].isalnum() or text[s-1] in "'-"): s -= 1
    n = len(text)
    while e < n and (text[e].isalnum() or text[e] in "'-"): e += 1
    return s, e

In [39]:
def _looks_like_item(surf: str) -> bool:
    surf = surf.strip()
    if len(surf) < 2: return False
    alpha = sum(ch.isalpha() for ch in surf)
    return alpha >= 2

In [40]:
def split_enumerations(spans: List[Span], text: str,
                       labels_to_split: Iterable[str] = ("Symptom", "ADR"),
                       also_split_and: bool = False) -> List[Span]:
    out: List[Span] = []
    for sp in spans:
        if sp.label not in labels_to_split or len(sp.ranges) != 1:
            out.append(sp); continue
        r = sp.ranges[0]
        chunk = text[r.start:r.end]
        if ("," not in chunk) and not (also_split_and and " and " in chunk.lower()):
            out.append(sp); continue

        parts: List[tuple[int,int]] = []
        last = 0
        for m in LIST_SEP_RE.finditer(chunk):
            parts.append((last, m.start())); last = m.end()
        parts.append((last, len(chunk)))

        items: List[tuple[int,int]] = []
        for (ps, pe) in parts:
            sub = chunk[ps:pe]
            if also_split_and and " and " in sub.lower() and "," not in sub:
                idx = sub.lower().find(" and ")
                if idx != -1:
                    items.append((ps, ps+idx)); items.append((ps+idx+5, pe))
                else:
                    items.append((ps, pe))
            else:
                items.append((ps, pe))

        children: List[Span] = []
        for (ps, pe) in items:
            s = r.start + ps; e = r.start + pe
            s, e = _trim_to_text_bounds(text, s, e)
            if e <= s: continue
            surf = text[s:e]
            if not _looks_like_item(surf): continue
            children.append(Span(sp.label, [Range(s, e)]))

        if len(children) >= 2: out.extend(children)
        else: out.append(sp)
    return out

In [41]:
# --- writer (.ann)
def to_brat_ann_lines(spans: Iterable[Span], text: str) -> List[str]:
    lines: List[str] = []
    i = 1
    for sp in spans:
        coords = ";".join(f"{r.start} {r.end}" for r in sp.ranges)
        surf = build_span_text(text, sp.ranges)
        lines.append(f"T{i}\t{sp.label} {coords}\t{surf}")
        i += 1
    return lines

In [42]:
# --- orchestration
def process_one_file(
    path: Path,
    model: str,
    out_dir: Path,
    temperature: float,
    max_chars: int,
    verbose: bool,
    keep_finding: bool,
    allow_procedures: bool,
    distance_policy: str,
    menstrual_policy: str,
    disease_symptom_relabel: str,
    split_enums: bool,
    split_and: bool,
) -> Path:
    raw = path.read_text(encoding="utf-8")
    chunks = chunk_text(raw, max_chars=max_chars)

    labels_list = list(BASE_LABELS)
    if keep_finding:
        labels_list.append("Finding")

    system_prompt = build_system_prompt(keep_finding, allow_procedures, distance_policy)

    all_spans: List[Span] = []
    for base, chunk in chunks:
        user = USER_TEMPLATE.format(n=len(chunk), chunk=chunk)
        resp_text = call_groq(model, system_prompt, user, temperature=temperature)
        try:
            data = parse_json_strict(resp_text)
        except Exception as ex:
            if verbose:
                print(f"[WARN] JSON parse failed @ {path.name} chunk@{base}: {ex}\n{resp_text[:400]}")
            continue
        raw_spans = sanitize_raw_spans(data, len(chunk), labels_list)

        repaired: List[Span] = []
        for rs in raw_spans:
            sp = repair_raw_span(rs, chunk)
            if sp is not None:
                repaired.append(sp)

        global_spans = to_global(repaired, base)
        all_spans.extend(global_spans)

    all_spans = clip_to_text(all_spans, raw)
    all_spans = post_filter_spans(
        all_spans,
        raw,
        keep_finding=keep_finding,
        allow_procedures=allow_procedures,
        distance_policy=distance_policy,
        menstrual_policy=menstrual_policy,
        disease_symptom_relabel=disease_symptom_relabel,
    )

    if split_enums:
        all_spans = split_enumerations(all_spans, raw, labels_to_split=("Symptom", "ADR"), also_split_and=split_and)

    final_spans = resolve_conflicts(all_spans, keep_finding=keep_finding)
    ann_lines = to_brat_ann_lines(final_spans, raw)

    out_dir.mkdir(parents=True, exist_ok=True)
    out_path = out_dir / (path.stem + ".ann")
    out_path.write_text("\n".join(ann_lines) + ("\n" if ann_lines else ""), encoding="utf-8")

    if verbose:
        print(f"\n--- {path.name} ({len(raw)} chars) ---")
        for ln in ann_lines:
            print(ln)
    print(f"[OK] {path.name}: {len(final_spans)} spans → {out_path}")
    return out_path

In [43]:
def process_all(
    text_dir: Path,
    model: str,
    out_dir: Path,
    **kw,
):
    files = sorted([p for p in text_dir.iterdir() if p.suffix.lower() == ".txt"])
    total, ok = 0, 0
    for p in files:
        total += 1
        try:
            out = process_one_file(p, model, out_dir, **kw)
            if out.exists() and out.stat().st_size > 0:
                ok += 1
        except Exception as ex:
            print(f"[WARN] {p.name}: {ex}")
    print(f"Done: {ok}/{total} files produced spans.")

In [50]:
from pathlib import Path

# === your paths ===
TEXT_DIR = Path("/Users/anjalikulkarni/Desktop/Assignment1/CADEC-lPWNPfjE-/data/cadec/text")
FILE_NAME = "ARTHROTEC.20.txt"  # set to "all" to process all files
OUT_DIR  = Path("/Users/anjalikulkarni/Desktop/Assignment1/predicted")
MODEL    = DEFAULT_MODEL

# === policy flags (match your CLI defaults) ===
kwargs = dict(
    temperature=0.0,
    max_chars=MAX_CHARS,
    verbose=True,
    keep_finding=False,
    allow_procedures=False,
    distance_policy="drop",
    menstrual_policy="auto",
    disease_symptom_relabel="on",
    split_enums=True,
    split_and=False,
)

if FILE_NAME.lower() == "all":
    process_all(TEXT_DIR, MODEL, OUT_DIR, **kwargs)
else:
    path = TEXT_DIR / FILE_NAME
    if not path.exists():
        raise FileNotFoundError(path)
    out_path = process_one_file(path, MODEL, OUT_DIR, **kwargs)
    print("\n--- .ann preview ---")
    print((OUT_DIR / (Path(FILE_NAME).stem + ".ann")).read_text(encoding="utf-8"))



--- ARTHROTEC.20.txt (634 chars) ---
T1	ADR 19 34	lower back pain
T2	Drug 64 78	arthrotec 50mg
T3	ADR 197 213	menstrual cramps
T4	ADR 224 232	bleeding
T5	Symptom 249 257	sickness
T6	Symptom 269 275	puking
[OK] ARTHROTEC.20.txt: 6 spans → /Users/anjalikulkarni/Desktop/Assignment1/predicted/ARTHROTEC.20.ann

--- .ann preview ---
T1	ADR 19 34	lower back pain
T2	Drug 64 78	arthrotec 50mg
T3	ADR 197 213	menstrual cramps
T4	ADR 224 232	bleeding
T5	Symptom 249 257	sickness
T6	Symptom 269 275	puking

