In [50]:
# Install dependencies (run once per environment)
%pip install -q dspy pandas python-dotenv

# dataset comes from here: https://www.kaggle.com/datasets/nikitpatel/invoice-ner-dataset?resource=download


Note: you may need to restart the kernel to use updated packages.


In [51]:
# Basic imports and environment setup
import os
import json
import dspy
import pandas as pd
from dotenv import load_dotenv

# Load API keys from .env (OPENAI_API_KEY is expected)
load_dotenv()

# Configure DSPy default LM similar to other notebooks
lm = dspy.LM("openai/gpt-5-mini", api_key=os.getenv("OPENAI_API_KEY"), temperature=1, max_tokens=16000)
dspy.configure(lm=lm)

print("DSPy configured for invoice extraction.")


DSPy configured for invoice extraction.


In [52]:
# Load dataset and preview a few rows
# Prefer absolute path next to this notebook; fallback to project root
nb_dir = os.path.dirname(os.path.abspath("__file__")) if "__file__" in globals() else os.path.abspath("./dspy")
candidates = [
    os.path.join(nb_dir, "invoice_ner_dataset.csv"),
    os.path.abspath("./dspy/invoice_ner_dataset.csv"),
    os.path.abspath("invoice_ner_dataset.csv"),
]
for p in candidates:
    if os.path.exists(p):
        csv_path = p
        break
else:
    raise FileNotFoundError("invoice_ner_dataset.csv not found in expected locations")

raw_df = pd.read_csv(csv_path)
print("Rows:", len(raw_df))
print(raw_df.head(2))

# Parse Final_Output JSON strings into dicts
raw_df["Final_Output"] = raw_df["Final_Output"].apply(lambda s: json.loads(s))

# Build small train/test splits for quick iteration
# Keep it tiny for a simple example; adjust as needed
train_df = raw_df.iloc[:30].copy()
valid_df = raw_df.iloc[30:40].copy()

def to_examples(df):
    examples = []
    for _, row in df.iterrows():
        examples.append(dspy.Example(text=row["Input"], target=row["Final_Output"]).with_inputs("text"))
    return examples

train_examples = to_examples(train_df)
valid_examples = to_examples(valid_df)

print(f"Train examples: {len(train_examples)}, Valid examples: {len(valid_examples)}")


Rows: 67
                                               Input  \
0  Cream and White Simple Minimalist Catering Ser...   
1  Beige Elegant Professional Business Invoice\n\...   

                                        Final_Output  
0  {"TOTAL_AMOUNT": "$1000", "DUE_AMOUNT": "$550"...  
1  {"INVOICE_NUMBER": "#01234", "BILLED_TO": "Est...  
Train examples: 30, Valid examples: 10


In [53]:
# Define a signature for extracting a JSON dict of invoice fields
class InvoiceExtraction(dspy.Signature):
    """
    Extract key-value invoice fields as a JSON dict from free-form invoice text.
    """
    text: str = dspy.InputField(description="Raw invoice text")
    rationale: str = dspy.OutputField(description="Brief reasoning, list detected fields")
    extracted: dict = dspy.OutputField(description="JSON dict with only UPPERCASE dataset keys and string values")

# Base module (simple Predict)
extractor = dspy.Predict(InvoiceExtraction)

print("Signature and extractor ready.")


Signature and extractor ready.


In [54]:
# Define a per-field accuracy metric: correct_fields / total_gold_fields
from dspy.evaluate import Evaluate


def normalize_dict(d: dict) -> dict:
    if d is None:
        return {}
    def norm_key(k: str) -> str:
        # Uppercase keys to align with dataset convention
        return str(k).strip().upper()
    def norm_val(v: str) -> str:
        s = str(v).strip()
        s = s.replace(",", "")
        s = s.replace("$ ", "$")
        s = " ".join(s.split())  # collapse internal whitespace
        return s
    return {norm_key(k): norm_val(v) for k, v in d.items()}


def field_accuracy_metric(example: dspy.Example, pred: dspy.Prediction, trace=None, pred_name=None, pred_trace=None) -> float:
    """
    Per-field accuracy metric with optional GEPA feedback.
    Returns a float for normal evaluation; when GEPA passes pred_name/pred_trace,
    returns dspy.Prediction(score=..., feedback=...).
    """
    gold = normalize_dict(example.target)
    got = normalize_dict(getattr(pred, "extracted", {}))
    if not gold:
        return 0.0 if pred_name is None else dspy.Prediction(score=0.0, feedback="No gold fields present.")

    total = len(gold)
    correct_keys = []
    mismatched = {}
    for k, v in gold.items():
        if k in got and got[k] == v:
            correct_keys.append(k)
        else:
            mismatched[k] = (v, got.get(k, "MISSING"))
    extra_keys = [k for k in got.keys() if k not in gold]

    score = len(correct_keys) / total

    if pred_name is None:
        return score

    # Build concise feedback for GEPA to refine prompts toward dataset schema
    lines = [
        f"Matched {len(correct_keys)}/{total} fields.",
    ]
    if mismatched:
        missed_list = ", ".join(list(mismatched.keys())[:8])
        lines.append(f"Missing/mismatched: {missed_list}.")
    if extra_keys:
        extra_list = ", ".join(extra_keys[:8])
        lines.append(f"Extra keys not in schema: {extra_list}.")

    feedback = "\n".join(lines)
    return dspy.Prediction(score=score, feedback=feedback)


# Provide the devset at construction per latest API
evaluate = Evaluate(devset=valid_examples, metric=field_accuracy_metric, ordered=True)

# Standard usage: pass the module directly
initial_score = evaluate(extractor)
print("Initial field accuracy on valid:", initial_score)


2025/09/16 14:12:13 INFO dspy.evaluate.evaluate: Average Metric: 3.228846153846154 / 10 (32.3%)


Initial field accuracy on valid: EvaluationResult(score=32.29, results=<list of 10 results>)


In [55]:
# Compare base vs optimized predictions on a sample
sample = valid_examples[0]
print("Sample comparison:")
print("\nINPUT TEXT:")
print("-" * 80)
print(sample.text[:300], "...")
print("-" * 80)

print("\nBASE MODEL PREDICTION:")
base_pred = extractor(text=sample.text)
print(f"Rationale: {base_pred.rationale}")
print("\nExtracted fields:")
for k,v in (base_pred.extracted or {}).items():
    print(f"{k:20s}: {v}")

print("\nGROUND TRUTH:")
print("\nExtracted fields:")
for k,v in sample.target.items():
    print(f"{k:20s}: {v}")


Sample comparison:

INPUT TEXT:
--------------------------------------------------------------------------------
Green Blue Pink Vintage Retro Freelance Invoice

NO. DESCRIPTION RATE QTY TOTAL
01 Graphic design for website 45 20 900.00
02 Graphic design for social content 45 8 360.00
03 Account management fee 280 1 280.00
04 Two days onsite 640 2 1280.00
SUB TOTAL $ 2820.00
TAX $ 225.60
S&H nil
TOTAL $ 3045.60 ...
--------------------------------------------------------------------------------

BASE MODEL PREDICTION:
Rationale: Detected invoice fields parsed from the text:
- Invoice title
- Invoice number ("NO. 001")
- Invoice date ("02.05.2024")
- From (biller) name, company, email, address, code
- To (client) name, company, email, address, code
- Line items (4 entries) with rate, quantity, totals
- Subtotal, Tax, Shipping (S&H), Total
- Bank / account details (Account Name, Bank, Acc)
- Payment instructions / terms ("Please pay within 60 days", cheques payable)
- Notes text

Extracte

In [56]:
# Optimize with GEPA (similar to comedian-agent)
from dspy import GEPA

optimizer = GEPA(
    metric=field_accuracy_metric,
    auto="light",
    num_threads=8,
    track_stats=True,
    use_merge=False,
    reflection_lm=lm,
)

optimized_program = optimizer.compile(
    extractor,
    trainset=train_examples,
    valset=valid_examples,
)

# Standard usage: evaluate the optimized program directly
opt_score = evaluate(optimized_program)
print("Optimized field accuracy on valid:", opt_score)


2025/09/16 14:12:13 INFO dspy.teleprompt.gepa.gepa: Running GEPA for approx 420 metric calls of the program. This amounts to 10.50 full evals on the train+val set.
2025/09/16 14:12:13 INFO dspy.teleprompt.gepa.gepa: Using 10 examples for tracking Pareto scores. You can consider using a smaller sample of the valset to allow GEPA to explore more diverse solutions within the same budget.
GEPA Optimization:   0%|          | 0/420 [00:00<?, ?rollouts/s]

2025/09/16 14:12:13 INFO dspy.evaluate.evaluate: Average Metric: 3.228846153846154 / 10 (32.3%)
2025/09/16 14:12:13 INFO dspy.teleprompt.gepa.gepa: Iteration 0: Base program full valset score: 0.3228846153846154
2025/09/16 14:12:13 INFO dspy.teleprompt.gepa.gepa: Iteration 1: Selected program 0 score: 0.3228846153846154


Average Metric: 1.26 / 3 (42.0%): 100%|██████████| 3/3 [00:00<00:00, 1315.93it/s]

2025/09/16 14:12:13 INFO dspy.evaluate.evaluate: Average Metric: 1.259049773755656 / 3 (42.0%)





2025/09/16 14:12:56 INFO dspy.teleprompt.gepa.gepa: Iteration 1: Proposed new text for self: Task: Extract structured invoice key/value fields as a JSON dictionary from a blob of free‑form invoice text.

Goal: produce a single JSON object (dictionary) containing only the standardized keys defined below (omit keys that cannot be confidently extracted). Do not output any prose, explanation, or additional keys outside the schema unless explicitly requested.

Schema (allowed keys - include only when present/confidently extracted):

Top-level fields (strings unless otherwise noted)
- INVOICE_NUMBER — invoice identifier as written (prefer labels like "Invoice No", "Invoice Number", "INVOICE NO.")
- INVOICE_DATE — invoice date. If parseable, normalize to ISO 8601 date (YYYY-MM-DD). If not parseable, return the raw date string.
- DUE_DATE — due date, normalized to YYYY-MM-DD when possible, otherwise raw string.
- SERVICE_DATE — service date / invoice period (YYYY-MM-DD if parseable), otherwise

Average Metric: 0.91 / 3 (30.2%): 100%|██████████| 3/3 [00:00<00:00, 2525.67it/s]

2025/09/16 14:13:54 INFO dspy.evaluate.evaluate: Average Metric: 0.9051282051282051 / 3 (30.2%)





2025/09/16 14:14:26 INFO dspy.teleprompt.gepa.gepa: Iteration 2: Proposed new text for self: Goal
- From a free-form invoice text, extract invoice fields and return a single JSON object (no surrounding text or explanation). The JSON must follow the canonical schema below. Only include keys from the schema and only include keys that are actually present in the input (do not invent extra keys). Values should be strings unless otherwise stated.

Schema (canonical keys and types)
- INVOICE_NUMBER (string)
- DATE_OF_ISSUE (string — prefer ISO 8601 YYYY-MM-DD when the date can be unambiguously parsed; otherwise use the original date substring)
- SERVICE_DATE (string — ISO when possible, otherwise original)
- DUE_DATE (string — ISO when possible)
- PAYMENT_TERMS (string — e.g., "Payment is due within 30 days", "Net 30")
- CURRENCY (string — ISO 4217 code if inferable from symbol/word (USD, EUR, GBP, etc.); otherwise the currency symbol like "$")
- SUBTOTAL (string — keep the currency symbol i

Average Metric: 0.63 / 3 (21.0%): 100%|██████████| 3/3 [00:00<00:00, 2559.58it/s]

2025/09/16 14:16:59 INFO dspy.evaluate.evaluate: Average Metric: 0.628704113224237 / 3 (21.0%)





2025/09/16 14:17:56 INFO dspy.teleprompt.gepa.gepa: Iteration 3: Proposed new text for self: Task summary
- Input: a single free-form plain-text invoice (often OCR-like, with irregular spacing, line breaks, typos, or obfuscation).
- Output: a single JSON object (and only the JSON object, no surrounding text) that extracts standard invoice key-value fields.

Required behavior and format
1. Output exactly one JSON object. Do not output any explanation, rationale, or other text.
2. Use the exact top-level key names (UPPERCASE with underscores) listed below. Always include every top-level key in the JSON output. If a field cannot be found, set its value to null (not an empty string) except where specified otherwise.
3. Normalize obvious corruptions (extra spaces inside words, common OCR insertions) when possible (e.g., "hel lo@real lygreats i te .com" → "hello@reallygreatsite.com"; "0000 0000" → "00000000").
4. Where multiple candidate values exist for a key, prefer the value explicitly la

Average Metric: 0.54 / 3 (18.1%): 100%|██████████| 3/3 [01:11<00:00, 23.92s/it]

2025/09/16 14:22:14 INFO dspy.evaluate.evaluate: Average Metric: 0.5432692307692308 / 3 (18.1%)





2025/09/16 14:23:07 INFO dspy.teleprompt.gepa.gepa: Iteration 4: Proposed new text for self: You are an invoice extraction assistant. Your job is to read a single free-form plain-text invoice (often OCR-like with irregular spacing, line breaks, typos, or obfuscation) and produce exactly one JSON object (and only the JSON object) that extracts a fixed set of standardized invoice key-value fields.

High-level rules (must follow exactly)
- Output exactly one JSON object and nothing else (no surrounding text, no explanation).
- The JSON must include every top-level key listed below. If a field cannot be found, set its value to null (except ITEMS which must be an array — empty array [] if no items).
- Do NOT add any extra top-level keys beyond the required list.
- Always return valid JSON.

Top-level keys (exact names and expected types)
- INVOICE_NO: string or null
- DATE: string or null (ISO "YYYY-MM-DD" if parsed unambiguously; otherwise return the original extracted date string)
- BILL_

Average Metric: 0.87 / 3 (29.1%): 100%|██████████| 3/3 [00:00<00:00, 1715.93it/s]

2025/09/16 14:27:26 INFO dspy.evaluate.evaluate: Average Metric: 0.8717948717948718 / 3 (29.1%)





2025/09/16 14:28:22 INFO dspy.teleprompt.gepa.gepa: Iteration 5: Proposed new text for self: Goal
- Extract structured invoice fields from free-form invoice text and return a single JSON object (dictionary) containing only the relevant fields discovered in the text.

Output rules (strict)
- Output must be valid JSON only (no explanations, no surrounding text).
- Include only keys from the allowed schema listed below. Do not add any other or "extra" keys.
- Include a key only if that field (or a reasonable value for it) can be reliably extracted from the invoice text. Do not output null placeholders for missing fields; simply omit missing fields.
- Preserve textual data as it appears when it cannot be reliably normalized. When normalization is possible and unambiguous, apply it as described below.

Allowed schema (top-level keys)
- INVOICE_NUMBER
- INVOICE_DATE (preferred ISO format YYYY-MM-DD when parseable; if parse fails, put the original date string)
- DATE_OF_ISSUE (synonym for INV

Average Metric: 0.92 / 3 (30.5%): 100%|██████████| 3/3 [00:00<00:00, 643.66it/s]

2025/09/16 14:30:02 INFO dspy.evaluate.evaluate: Average Metric: 0.9160401002506265 / 3 (30.5%)





2025/09/16 14:30:40 INFO dspy.teleprompt.gepa.gepa: Iteration 6: Proposed new text for self: Task: From a single free-form invoice text input, extract invoice data and return a single JSON object (no surrounding text or explanation) containing only the canonical fields described below. The input is arbitrary OCR-like invoice text (may contain spelling/formatting errors, merged headings, inconsistent spacing). Your job is to robustly locate and normalize invoice fields, map synonyms to canonical keys, and output only the JSON object.

General rules
- Output must be valid JSON only (no surrounding commentary).
- Include only keys from the canonical schema listed below and only when you can confidently extract a value. Do not invent or guess values beyond what the text supports.
- Normalize whitespace, remove obvious OCR artifacts (extra spaces, stray punctuation), and trim leading/trailing whitespace.
- When possible, normalize dates to ISO 8601 (YYYY-MM-DD). If the invoice uses an ambig

Average Metric: 1.02 / 3 (34.1%): 100%|██████████| 3/3 [01:10<00:00, 23.59s/it]

2025/09/16 14:33:03 INFO dspy.evaluate.evaluate: Average Metric: 1.0232708468002585 / 3 (34.1%)





2025/09/16 14:34:00 INFO dspy.teleprompt.gepa.gepa: Iteration 7: Proposed new text for self: You are an extraction assistant. Your job: from a free-form invoice text produce a single JSON object (strictly only JSON, no surrounding text) that follows the canonical schema below. Use best-effort parsing but do not invent values. Only include keys from the schema and only include keys actually present in the input.

Canonical schema (top-level keys — include only keys present)
- INVOICE_NUMBER (string)
- DATE_OF_ISSUE (string)
- SERVICE_DATE (string)
- DUE_DATE (string)
- PAYMENT_TERMS (string)
- CURRENCY (string)
- SUBTOTAL (string)
- TAX_RATE (string)
- TAX_AMOUNT (string)
- TOTAL_AMOUNT (string)
- TOTAL_AMOUNT_DUE (string)
- AMOUNT_PAID (string)
- SELLER_NAME (string)
- SELLER_ADDRESS (string)
- SELLER_PHONE (string)
- SELLER_EMAIL (string)
- BUYER_NAME (string)
- BUYER_ADDRESS (string)
- BUYER_PHONE (string)
- BUYER_EMAIL (string)
- LINE_ITEMS (array of objects). Each line item object 

Average Metric: 1.34 / 3 (44.7%): 100%|██████████| 3/3 [00:00<00:00, 793.47it/s]

2025/09/16 14:37:38 INFO dspy.evaluate.evaluate: Average Metric: 1.3423831070889893 / 3 (44.7%)





2025/09/16 14:38:26 INFO dspy.teleprompt.gepa.gepa: Iteration 8: Proposed new text for self: Goal
- Extract structured invoice fields from free-form invoice text and return a single JSON object (key-value dictionary) containing only the fields described below. Do not invent values. When a field cannot be determined unambiguously, either omit it or include a clearly-named RAW_* fallback (see "Ambiguity & fallbacks" below).

Output format
- Output must be valid JSON (a single top-level object).
- Only include keys from the "Allowed schema" list below and the allowed RAW_* fallback keys. Do not add any other keys.
- When present, values should be strings exactly as found in the text unless the schema requires a specific normalized format (dates, quantities, numeric values). See normalization rules.

Allowed schema (keys and required value format)
- INVOICE_TITLE: string (exact title/header if present)
- INVOICE_NUMBER: string
- DATE_OF_ISSUE: string in ISO 8601 format YYYY-MM-DD if the da

Average Metric: 0.99 / 3 (33.0%): 100%|██████████| 3/3 [00:00<00:00, 2452.33it/s]

2025/09/16 14:39:24 INFO dspy.evaluate.evaluate: Average Metric: 0.9914529914529915 / 3 (33.0%)





2025/09/16 14:40:10 INFO dspy.teleprompt.gepa.gepa: Iteration 9: Proposed new text for self: Goal
- Extract structured invoice fields from free-form invoice text and return exactly one valid JSON object containing only the predefined schema keys (see Schema below). Do not include any explanatory text, markdown, or code fences — only the JSON object.

General rules
- If a field is present in the text, return the extracted value for that field. If a field is not present or cannot be reasonably inferred, set that field's value to null.
- Only use the schema keys defined below. Do not add any other keys.
- Preserve original textual values when exact parsing is uncertain, but still set missing subfields to null (see Line items rules).
- Always attempt to normalize dates to ISO 8601 (YYYY-MM-DD). If normalization is impossible, put the original date string.
- Normalize numeric currency amounts by removing thousands separators (commas) but keep two decimal places. Also return a top-level curr

Average Metric: 1.04 / 3 (34.6%): 100%|██████████| 3/3 [00:00<00:00, 2704.84it/s]

2025/09/16 14:40:55 INFO dspy.evaluate.evaluate: Average Metric: 1.0367647058823528 / 3 (34.6%)





2025/09/16 14:41:40 INFO dspy.teleprompt.gepa.gepa: Iteration 10: Proposed new text for self: You are given a free-form invoice text string. Extract invoice key/value fields and return a single JSON object (a flat dict with a few nested lists) containing only the fields in the schema below. Do NOT add any extra keys. Populate every key in the schema; if a value cannot be found with reasonable confidence, set that key's value to null.

GENERAL RULES
- Output: a single JSON object and nothing else.
- Do not add keys beyond the schema below.
- Prefer explicit label matches (e.g., "Invoice No", "Date:", "Total:") over heuristics. When multiple candidates exist, follow the tie-breakers below.
- Keep extracted values trimmed of leading/trailing whitespace.
- Use best-effort normalization for dates and amounts as described below. If normalization fails, return the original substring found.
- Use regexes for common patterns (emails, phones, IBAN/Acct numbers, currency symbols).
- Proximity rul

Average Metric: 0.71 / 3 (23.5%): 100%|██████████| 3/3 [00:00<00:00, 2645.14it/s]

2025/09/16 14:43:16 INFO dspy.evaluate.evaluate: Average Metric: 0.7051282051282051 / 3 (23.5%)





2025/09/16 14:44:25 INFO dspy.teleprompt.gepa.gepa: Iteration 11: Proposed new text for self: You are an extractor that takes free-form invoice text and returns a single JSON object (and only the JSON object — no extra prose) containing normalized key/value invoice fields. Use the rules below exactly.

1) Output format and strictness
- Return exactly one valid JSON object and nothing else.
- Only include the top-level keys listed in the schema below. Do NOT add top-level keys outside the schema.
- If a field cannot be found or is ambiguous, set its value to null (except LINE_ITEMS should be an empty list if no items found).
- Normalize values where specified (dates, numbers, currency codes). Preserve original raw strings inside item-level RAW fields when item mappings are ambiguous (see LINE_ITEMS structure below).

2) Top-level schema (all keys MUST appear; absent values = null; LINE_ITEMS must be a list)
{
  "INVOICE_NUMBER": string or null,
  "DATE_OF_ISSUE": string (ISO 8601 "YYYY-

Average Metric: 1.09 / 3 (36.2%): 100%|██████████| 3/3 [00:00<00:00, 2395.83it/s]

2025/09/16 14:45:23 INFO dspy.evaluate.evaluate: Average Metric: 1.086080586080586 / 3 (36.2%)





2025/09/16 14:46:06 INFO dspy.teleprompt.gepa.gepa: Iteration 12: Proposed new text for self: Goal
- Extract structured invoice key-value fields from free-form invoice text and return a single JSON object (dictionary) containing only the expected fields (no extra keys, no surrounding text).

Input
- A plain text invoice (may be messy: inconsistent spacing, broken headings, multiple addresses, currency symbols, dates in various formats, line items in columns or lines).

Output requirements
1. Return exactly one JSON object (dictionary) and nothing else (no explanation, no extra text).
2. Use only the fields in the schema below. Do NOT add other keys.
3. For monetary values: return numeric values (floats) for SUBTOTAL, TAX, TOTAL, and each item LINE_AMOUNT and UNIT_PRICE. Also include a CURRENCY field when a currency symbol or name is detectable (use 3-letter ISO code where possible, e.g., "USD", "EUR"). If currency cannot be detected, set CURRENCY to null.
4. For dates: convert to ISO 8

Average Metric: 1.41 / 3 (46.9%): 100%|██████████| 3/3 [00:00<00:00, 2664.18it/s]

2025/09/16 14:46:57 INFO dspy.evaluate.evaluate: Average Metric: 1.4068825910931173 / 3 (46.9%)





2025/09/16 14:47:46 INFO dspy.teleprompt.gepa.gepa: Iteration 13: Proposed new text for self: You are an invoice-extraction assistant. Task: given free-form invoice text, return exactly one JSON object (no surrounding text or explanation) containing the extracted invoice key/value fields. Follow these rules precisely.

1) Output format rules
- Output must be a single valid JSON object and nothing else.
- Only include keys from the canonical schema listed below. Do not add extra keys.
- Omit keys that cannot be confidently extracted from the text (do not output null unless the field is present but explicitly blank).
- Dates: when you can parse to a real date, normalize to ISO 8601 date format "YYYY-MM-DD". If you cannot parse unambiguously, output the original string as given.
- Monetary values: preserve the currency symbol if present (e.g., "$1,250.00") in the string field. Also include a numeric value field (float) for any subtotal/tax/discount/total/line item amount you extract—see s

Average Metric: 1.00 / 3 (33.2%): 100%|██████████| 3/3 [01:20<00:00, 26.88s/it]

2025/09/16 14:50:17 INFO dspy.evaluate.evaluate: Average Metric: 0.9957264957264957 / 3 (33.2%)





2025/09/16 14:51:05 INFO dspy.teleprompt.gepa.gepa: Iteration 14: Proposed new text for self: You are an invoice-parsing assistant. Your job is: from a free-form invoice text input, extract invoice fields and return a single JSON object (and only the JSON object) that strictly follows the canonical schema and rules below.

1) Output format and schema
- Output exactly one JSON object and nothing else (no surrounding text, explanation, or logging).
- Top-level keys must be ONLY from this canonical list (uppercase with underscores). Do not add any other top-level keys. Omit keys that are not present in the input.
  - INVOICE_NUMBER (string)
  - DATE_OF_ISSUE (string)
  - SERVICE_DATE (string)
  - DUE_DATE (string)
  - PAYMENT_TERMS (string)
  - CURRENCY (string)
  - SUBTOTAL (string)
  - TAX_RATE (string)
  - TAX_AMOUNT (string)
  - TOTAL_AMOUNT (string)
  - TOTAL_AMOUNT_DUE (string)
  - AMOUNT_PAID (string)
  - SELLER_NAME (string)
  - SELLER_ADDRESS (string)
  - SELLER_PHONE (string)
  

Average Metric: 0.78 / 3 (26.0%): 100%|██████████| 3/3 [01:42<00:00, 34.02s/it]

2025/09/16 14:54:06 INFO dspy.evaluate.evaluate: Average Metric: 0.7794117647058824 / 3 (26.0%)





2025/09/16 14:55:15 INFO dspy.teleprompt.gepa.gepa: Iteration 15: Proposed new text for self: You are an invoice-extraction assistant. Your job is to read a single free-form plain-text invoice (often OCR-like with irregular spacing, line breaks, typos, or fragmentation) and produce exactly one JSON object (and only the JSON object) that extracts a fixed set of standardized invoice key-value fields.

High-level output constraints (MUST follow exactly)
- Output exactly one JSON object and nothing else (no commentary, no surrounding text, no logs).
- The JSON object must contain exactly the top-level keys listed below and no others.
- For any field that cannot be confidently found, set its value to null (EXCEPT ITEMS which must always be an array; if no items found use an empty array []).
- Use null for missing values — never use "None", empty strings, or other placeholders.
- Always return valid JSON.

Required top-level keys and types (exact key names; do not change)
- INVOICE_NO: strin

Average Metric: 1.05 / 3 (34.9%): 100%|██████████| 3/3 [00:00<00:00, 2336.66it/s]

2025/09/16 14:56:49 INFO dspy.evaluate.evaluate: Average Metric: 1.0462461300309598 / 3 (34.9%)





2025/09/16 14:57:35 INFO dspy.teleprompt.gepa.gepa: Iteration 16: Proposed new text for self: You are an extractor that converts free-form invoice text into a single JSON dictionary of key→value pairs. Always output exactly one valid JSON object (no surrounding text, no explanation). Use only the keys described below. If a field cannot be found or confidently inferred, include the key with a value of null.

Output keys (exact key names and expected value types):
- INVOICE_NUMBER (string) — invoice identifier (accept formats like "Invoice No", "No:", "#", etc.)
- INVOICE_DATE (string) — date of invoice in ISO 8601 format YYYY-MM-DD if parseable; otherwise the original date string
- DUE_DATE (string) — payment due date, same normalization rules as INVOICE_DATE
- INVOICE_TITLE (string) — title/header of the document if present (e.g., "Invoice", "Professional Service Invoice")
- COMPANY_NAME (string) — issuer/sender name (company or person who issued invoice)
- COMPANY_ADDRESS (string) — i

Average Metric: 0.68 / 3 (22.5%): 100%|██████████| 3/3 [00:50<00:00, 16.74s/it]

2025/09/16 14:59:45 INFO dspy.evaluate.evaluate: Average Metric: 0.6758241758241759 / 3 (22.5%)





2025/09/16 15:01:13 INFO dspy.teleprompt.gepa.gepa: Iteration 17: Proposed new text for self: You are an invoice extraction assistant. Your job is to read a single free-form plain-text invoice (often OCR-like with irregular spacing, line breaks, typos, or obfuscation) and produce exactly one JSON object (and only the JSON object) that extracts a fixed set of standardized invoice key-value fields.

CRITICAL OUTPUT RULES (must follow exactly)
- Output exactly one JSON object and nothing else — no explanation, commentary, or any extra text.
- The JSON must include every top-level key listed below. If a field cannot be found, set its value to null (except ITEMS which must be an array — use [] if no items).
- Do NOT add any extra top-level keys beyond the required list.
- Always return valid JSON.
- Use the exact key names and types described below.

Top-level keys (exact names and expected types)
- INVOICE_NO: string or null
- DATE: string or null (ISO "YYYY-MM-DD" if parsed unambiguously;

Average Metric: 1.07 / 3 (35.8%): 100%|██████████| 3/3 [00:00<00:00, 2312.61it/s]

2025/09/16 15:02:14 INFO dspy.evaluate.evaluate: Average Metric: 1.074983839689722 / 3 (35.8%)





2025/09/16 15:03:00 INFO dspy.teleprompt.gepa.gepa: Iteration 18: Proposed new text for self: Task: From a single block of free-form invoice text, extract a fixed set of invoice fields and return a single JSON object (dictionary) containing only those fields. Do not output any prose, explanation, or extra keys — only the JSON object.

Primary rules
- Always return exactly the keys in the "Schema" below. If a value cannot be found or confidently inferred, set that key's value to null (not an empty string). Do NOT add any other keys.
- Dates must be normalized to ISO 8601 date strings (YYYY-MM-DD) when a full date is present and parseable. If only a month/year is present, use YYYY-MM. If not parseable, set null.
- Monetary values should be returned as strings in the original currency+format as found (e.g., "$900.00", "₹12,500"). Remove thousands separators when producing the numeric form below.
- For numeric convenience, each monetary field in the schema that expects an amount must also 

Average Metric: 1.38 / 3 (46.2%): 100%|██████████| 3/3 [01:21<00:00, 27.17s/it]  

2025/09/16 15:05:29 INFO dspy.evaluate.evaluate: Average Metric: 1.3846153846153846 / 3 (46.2%)





2025/09/16 15:06:25 INFO dspy.teleprompt.gepa.gepa: Iteration 19: Proposed new text for self: You are an extraction assistant whose job is: given a free-form invoice text, extract invoice fields and return a single JSON object (no surrounding text or explanation) that strictly follows the canonical schema below. The instruction set is definitive and includes many domain-specific heuristics and normalization rules to maximize accuracy and consistency.

Canonical schema (allowed top-level keys and types)
- INVOICE_NUMBER (string)
- DATE_OF_ISSUE (string — prefer ISO 8601 YYYY-MM-DD when unambiguous; otherwise return the original substring)
- SERVICE_DATE (string — ISO when possible, otherwise original)
- DUE_DATE (string — ISO when possible)
- PAYMENT_TERMS (string)
- CURRENCY (string — ISO 4217 code if inferable from symbol/word (USD, EUR, GBP, etc.); otherwise the currency symbol like "$")
- SUBTOTAL (string)
- TAX_RATE (string — e.g., "10%")
- TAX_AMOUNT (string)
- TOTAL_AMOUNT (strin

Average Metric: 0.75 / 3 (24.9%): 100%|██████████| 3/3 [00:00<00:00, 2860.40it/s]

2025/09/16 15:10:33 INFO dspy.evaluate.evaluate: Average Metric: 0.7471091000502765 / 3 (24.9%)





2025/09/16 15:11:39 INFO dspy.teleprompt.gepa.gepa: Iteration 20: Proposed new text for self: Task: Given free-form invoice text, extract key invoice fields and return a single JSON dictionary containing only the allowed schema keys and their normalized values.

Output rules
- Respond with JSON only (no explanatory text).
- Include only keys from the canonical schema below. Omit keys that cannot be confidently extracted.
- Use the exact key names shown in the schema (uppercase snake case).
- Normalize values as specified below (dates, currency, numbers, emails, phone).
- If a field is ambiguous or partially present, include it only if you can extract a reasonable value; otherwise omit it. If line items cannot be reliably split into structured rows, provide ITEMS_RAW (see schema).

Canonical schema (allowed keys and value formats)
- INVOICE_NUMBER: string (raw invoice identifier as seen)
- INVOICE_DATE: string in ISO 8601 date format YYYY-MM-DD (if only partial date given, try to infer 

Average Metric: 0.66 / 3 (22.1%): 100%|██████████| 3/3 [00:57<00:00, 19.19s/it]  

2025/09/16 15:13:40 INFO dspy.evaluate.evaluate: Average Metric: 0.6638655462184874 / 3 (22.1%)





2025/09/16 15:14:45 INFO dspy.teleprompt.gepa.gepa: Iteration 21: Proposed new text for self: You are an invoice-extraction assistant. You will receive a single free-form plain-text invoice (often OCR-like with irregular spacing, broken tokens, weird characters, or line breaks). Your job is to extract a fixed set of standardized fields and return exactly one JSON object (and nothing else) following the strict schema and rules below.

CRITICAL OUTPUT RULES (must follow exactly)
- Output exactly one JSON object and nothing else (no commentary, no extra text).
- The JSON must contain every top-level key listed below. If a field cannot be found, set its value to null (except ITEMS which must be an array — use [] if no items).
- Do NOT add any top-level keys beyond the required list.
- Values must be of the required types (strings or null for most fields; numbers for QTY/HOURS when parsed; ITEMS must be an array of ITEM objects).
- Always return valid JSON.

TOP-LEVEL KEYS (exact names and 

Average Metric: 0.76 / 3 (25.5%): 100%|██████████| 3/3 [00:34<00:00, 11.35s/it]  

2025/09/16 15:18:50 INFO dspy.evaluate.evaluate: Average Metric: 0.7638888888888888 / 3 (25.5%)





2025/09/16 15:19:32 INFO dspy.teleprompt.gepa.gepa: Iteration 22: Proposed new text for self: You are an extractor that converts a free-form invoice text into a single JSON object following a strict canonical schema. Read the entire input text and produce only one JSON object (no surrounding text, no explanation). Follow these rules exactly.

Canonical schema (top-level keys — include ONLY these keys and ONLY when the input provides data for them):
- INVOICE_NUMBER (string)
- DATE_OF_ISSUE (string)
- SERVICE_DATE (string)
- DUE_DATE (string)
- PAYMENT_TERMS (string)
- CURRENCY (string)
- SUBTOTAL (string)
- TAX_RATE (string)
- TAX_AMOUNT (string)
- TOTAL_AMOUNT (string)
- TOTAL_AMOUNT_DUE (string)
- AMOUNT_PAID (string)
- SELLER_NAME (string)
- SELLER_ADDRESS (string)
- SELLER_PHONE (string)
- SELLER_EMAIL (string)
- BUYER_NAME (string)
- BUYER_ADDRESS (string)
- BUYER_PHONE (string)
- BUYER_EMAIL (string)
- LINE_ITEMS (array of objects). Each line-item object may contain:
    - DESCRI

Average Metric: 1.29 / 3 (43.1%): 100%|██████████| 3/3 [01:18<00:00, 26.26s/it]

2025/09/16 15:23:41 INFO dspy.evaluate.evaluate: Average Metric: 1.2941176470588236 / 3 (43.1%)





2025/09/16 15:24:33 INFO dspy.teleprompt.gepa.gepa: Iteration 23: Proposed new text for self: You are an extractor whose job is to convert a free-form invoice text into a single JSON object exactly following a strict canonical schema. Read the entire input text and produce only one JSON object (no surrounding text, no explanation). Follow these rules exactly.

Top-level schema (include ONLY these keys and ONLY when the input provides data for them):
- INVOICE_NUMBER (string)
- DATE_OF_ISSUE (string)
- SERVICE_DATE (string)
- DUE_DATE (string)
- PAYMENT_TERMS (string)
- CURRENCY (string)
- SUBTOTAL (string)
- TAX_RATE (string)
- TAX_AMOUNT (string)
- TOTAL_AMOUNT (string)
- TOTAL_AMOUNT_DUE (string)
- AMOUNT_PAID (string)
- SELLER_NAME (string)
- SELLER_ADDRESS (string)
- SELLER_PHONE (string)
- SELLER_EMAIL (string)
- BUYER_NAME (string)
- BUYER_ADDRESS (string)
- BUYER_PHONE (string)
- BUYER_EMAIL (string)
- LINE_ITEMS (array of objects). Each line-item object may contain:
    - DESCR

Average Metric: 1.07 / 3 (35.7%): 100%|██████████| 3/3 [00:00<00:00, 3792.32it/s]

2025/09/16 15:27:21 INFO dspy.evaluate.evaluate: Average Metric: 1.0717948717948718 / 3 (35.7%)





2025/09/16 15:28:10 INFO dspy.teleprompt.gepa.gepa: Iteration 24: Proposed new text for self: Goal
- Extract key invoice fields from free-form invoice text and return a single JSON object (dictionary) containing a predictable set of keys and normalized values.

General rules
- Output only one JSON object (no extra text, no explanations).
- Use the exact key names defined in the "Required schema" section below (uppercase snake_case).
- If a schema field is present in the invoice text, fill it with a parsed/normalized value. If it is not present / cannot be determined, set that key's value to null.
- Normalize dates to ISO 8601 format YYYY-MM-DD when a clear date can be parsed. If the invoice uses only a month name or ambiguous format and you cannot reliably pick day/month, leave the original string but still return it (and do not invent a date).
- Normalize monetary numeric values to numbers (no currency symbols, no thousands separators) for numeric keys (SUBTOTAL, TAX_AMOUNT, GRAND_TOT

Average Metric: 1.41 / 3 (47.0%): 100%|██████████| 3/3 [01:11<00:00, 23.84s/it]

2025/09/16 15:30:15 INFO dspy.evaluate.evaluate: Average Metric: 1.4102564102564101 / 3 (47.0%)





2025/09/16 15:31:02 INFO dspy.teleprompt.gepa.gepa: Iteration 25: Proposed new text for self: You are an extractor whose job is to convert free-form invoice text into a single JSON object exactly following a strict canonical schema. Produce only one JSON object and nothing else. Use the rules below precisely.

Top-level schema (ONLY these keys; include a key only when the input provides data for it):
- INVOICE_NUMBER (string)
- DATE_OF_ISSUE (string)
- SERVICE_DATE (string)
- DUE_DATE (string)
- PAYMENT_TERMS (string)
- CURRENCY (string)
- SUBTOTAL (string)
- TAX_RATE (string)
- TAX_AMOUNT (string)
- TOTAL_AMOUNT (string)
- TOTAL_AMOUNT_DUE (string)
- AMOUNT_PAID (string)
- SELLER_NAME (string)
- SELLER_ADDRESS (string)
- SELLER_PHONE (string)
- SELLER_EMAIL (string)
- BUYER_NAME (string)
- BUYER_ADDRESS (string)
- BUYER_PHONE (string)
- BUYER_EMAIL (string)
- LINE_ITEMS (array of objects). Each line-item object may contain only these keys:
    - DESCRIPTION (string)
    - QUANTITY (st

Average Metric: 0.73 / 3 (24.5%): 100%|██████████| 3/3 [00:00<00:00, 2780.14it/s]

2025/09/16 15:31:55 INFO dspy.evaluate.evaluate: Average Metric: 0.7337461300309598 / 3 (24.5%)





2025/09/16 15:32:29 INFO dspy.teleprompt.gepa.gepa: Iteration 26: Proposed new text for self: Task: From a block of free-form invoice text, extract invoice fields and return a single JSON object containing only the fields in the canonical schema below. Normalize and clean values according to the rules. If a field is not present or cannot be determined, set its value to null (or for ITEMS, an empty list). Do NOT add any other keys.

Canonical output schema (exact key names and expected types):
- INVOICE_NO: string or null (invoice number / reference)
- INVOICE_DATE: string or null (ISO 8601 date "YYYY-MM-DD" if parsable; otherwise the original date string)
- DUE_DATE: string or null (ISO 8601 or original string)
- PO_NUMBER: string or null
- INVOICE_TITLE: string or null (title text, e.g., "INVOICE", "Commercial Invoice", or graphic title)
- SENDER_NAME: string or null
- SENDER_COMPANY: string or null
- SENDER_ADDRESS: string or null (single-line normalized address)
- SENDER_EMAIL: stri

Average Metric: 1.07 / 3 (35.6%): 100%|██████████| 3/3 [00:59<00:00, 19.93s/it]

2025/09/16 15:34:47 INFO dspy.evaluate.evaluate: Average Metric: 1.0672268907563025 / 3 (35.6%)





2025/09/16 15:35:30 INFO dspy.teleprompt.gepa.gepa: Iteration 27: Proposed new text for self: You are an extractor that converts a free-form invoice text into exactly one JSON object following a strict canonical schema. Read the entire input and produce ONLY one JSON object (no surrounding text or explanation). Follow these rules exactly.

1) TOP-LEVEL SCHEMA (only include these keys and only when data is present in the input)
- INVOICE_NUMBER (string)
- DATE_OF_ISSUE (string)
- SERVICE_DATE (string)
- DUE_DATE (string)
- PAYMENT_TERMS (string)
- CURRENCY (string)
- SUBTOTAL (string)
- TAX_RATE (string)
- TAX_AMOUNT (string)
- TOTAL_AMOUNT (string)
- TOTAL_AMOUNT_DUE (string)
- AMOUNT_PAID (string)
- SELLER_NAME (string)
- SELLER_ADDRESS (string)
- SELLER_PHONE (string)
- SELLER_EMAIL (string)
- BUYER_NAME (string)
- BUYER_ADDRESS (string)
- BUYER_PHONE (string)
- BUYER_EMAIL (string)
- LINE_ITEMS (array of objects). Each line-item object may contain:
    - DESCRIPTION (string)
    - Q

Average Metric: 1.41 / 3 (47.0%): 100%|██████████| 3/3 [00:00<00:00, 2749.76it/s]

2025/09/16 15:36:52 INFO dspy.evaluate.evaluate: Average Metric: 1.4102564102564101 / 3 (47.0%)





2025/09/16 15:37:41 INFO dspy.teleprompt.gepa.gepa: Iteration 28: Proposed new text for self: Goal
- From a free-form invoice text, extract invoice fields and line items and return a single JSON dictionary (object) containing only canonical keys (see "Canonical schema" below) with their extracted values. Do not output any extra prose, explanations or metadata — only the JSON object.

General rules
1. Output format
   - Return exactly one JSON object. Do not wrap in markdown/code fences or add commentary.
   - Only include keys that appear in the canonical schema and only include a key if a non-empty value can be confidently extracted.
   - Key names must match the canonical schema exactly (uppercase with underscores).
   - For collections of line items use the ITEMS key with a JSON array of item objects (each item object uses the canonical item field names described below).

2. Canonical schema (allowed top-level keys)
   - INVOICE_NUMBER
   - DATE_OF_ISSUE           (preferred: ISO 86

Average Metric: 0.96 / 3 (31.9%): 100%|██████████| 3/3 [00:00<00:00, 3330.57it/s]

2025/09/16 15:38:17 INFO dspy.evaluate.evaluate: Average Metric: 0.9575791855203619 / 3 (31.9%)





2025/09/16 15:39:06 INFO dspy.teleprompt.gepa.gepa: Iteration 29: Proposed new text for self: Task: Extract invoice fields from free-form invoice text and return a single JSON dictionary with a fixed schema. The assistant should be robust to noisy layouts (labels, tables, columns, repeated lines) and handle common invoice conventions (labels like "Invoice No", "Bill To", "Subtotal", "Tax", "Bank", "Account No", "Due Date", "TOTAL AMOUNT DUE", etc.).

Requirements and rules
- Input: a single string containing free-form invoice text.
- Output: exactly one JSON object (dictionary) and nothing else. Do not add any explanatory text.
- Use the exact key names and structure below (snake_case). Do not add additional top-level keys beyond those specified. For any field not found, return null.
- Clean and normalize values where indicated:
  - Dates: return invoice_date, due_date, service_date in ISO 8601 (YYYY-MM-DD) when possible. If the date format is ambiguous or cannot be parsed, return the 

Average Metric: 0.95 / 3 (31.5%): 100%|██████████| 3/3 [01:17<00:00, 25.77s/it]

2025/09/16 15:41:25 INFO dspy.evaluate.evaluate: Average Metric: 0.9463562753036436 / 3 (31.5%)





2025/09/16 15:42:01 INFO dspy.teleprompt.gepa.gepa: Iteration 30: Proposed new text for self: You are an extractor whose job is to convert a free-form invoice text into a single JSON object exactly following a strict canonical schema. Use the rules below precisely and do not add, remove, or rename any top-level keys beyond the canonical schema. Output exactly one JSON object and nothing else.

CANONICAL SCHEMA (only these possible top-level keys; include a key ONLY when the input provides a value)
- INVOICE_NUMBER (string)
- DATE_OF_ISSUE (string)
- SERVICE_DATE (string)
- DUE_DATE (string)
- PAYMENT_TERMS (string)
- CURRENCY (string)
- SUBTOTAL (string)
- TAX_RATE (string)
- TAX_AMOUNT (string)
- TOTAL_AMOUNT (string)
- TOTAL_AMOUNT_DUE (string)
- AMOUNT_PAID (string)
- SELLER_NAME (string)
- SELLER_ADDRESS (string)
- SELLER_PHONE (string)
- SELLER_EMAIL (string)
- BUYER_NAME (string)
- BUYER_ADDRESS (string)
- BUYER_PHONE (string)
- BUYER_EMAIL (string)
- LINE_ITEMS (array of objects

Average Metric: 1.29 / 3 (43.0%): 100%|██████████| 3/3 [00:00<00:00, 2163.13it/s]

2025/09/16 15:43:24 INFO dspy.evaluate.evaluate: Average Metric: 1.291208791208791 / 3 (43.0%)





2025/09/16 15:44:23 INFO dspy.teleprompt.gepa.gepa: Iteration 31: Proposed new text for self: You are an extractor whose job is to convert a free-form invoice text into a single JSON object exactly following a strict canonical schema. Use the following detailed rules, heuristics, label mappings, and constraints every time you produce output.

1) REQUIRED OUTPUT FORMAT (absolute)
- Output exactly one valid JSON object and nothing else. No surrounding text, no explanation.
- The JSON object must include ONLY keys from this canonical top-level schema and ONLY when that field can be extracted from the input:
  - INVOICE_NUMBER (string)
  - DATE_OF_ISSUE (string)
  - SERVICE_DATE (string)
  - DUE_DATE (string)
  - PAYMENT_TERMS (string)
  - CURRENCY (string)
  - SUBTOTAL (string)
  - TAX_RATE (string)
  - TAX_AMOUNT (string)
  - TOTAL_AMOUNT (string)
  - TOTAL_AMOUNT_DUE (string)
  - AMOUNT_PAID (string)
  - SELLER_NAME (string)
  - SELLER_ADDRESS (string)
  - SELLER_PHONE (string)
  - SELL

Average Metric: 0.65 / 3 (21.5%): 100%|██████████| 3/3 [01:24<00:00, 28.19s/it]

2025/09/16 15:47:19 INFO dspy.evaluate.evaluate: Average Metric: 0.645016339869281 / 3 (21.5%)





2025/09/16 15:48:19 INFO dspy.teleprompt.gepa.gepa: Iteration 32: Proposed new text for self: You are an invoice-extraction assistant. You will receive a single free-form plain-text invoice (often OCR-like with irregular spacing, broken tokens, weird characters, or line breaks). Your job is to extract a fixed set of standardized fields and return exactly one JSON object (and nothing else) following the strict schema and rules below.

CRITICAL: Output rules (repeat for strictness)
- Output exactly one JSON object and nothing else (no commentary, no extra text).
- The JSON object MUST contain every top-level key listed below. If a field cannot be found, set its value to null (except ITEMS which must be an array — use [] if no items).
- Do NOT add any top-level keys beyond the required list.
- Values must match required types exactly (strings or null for most fields; numbers for QTY/HOURS when parsed; ITEMS must be an array of ITEM objects).
- Always return valid JSON.
- Never return empt

Average Metric: 0.40 / 3 (13.2%): 100%|██████████| 3/3 [01:08<00:00, 22.95s/it]

2025/09/16 15:50:47 INFO dspy.evaluate.evaluate: Average Metric: 0.39598997493734334 / 3 (13.2%)





2025/09/16 15:51:55 INFO dspy.teleprompt.gepa.gepa: Iteration 33: Proposed new text for self: You are an extractor whose job is to convert a free-form invoice text into a single JSON object exactly following a strict canonical schema. Produce ONLY one JSON object and NOTHING ELSE. This instruction is authoritative — follow it exactly.

Top-level schema (ONLY these keys may appear; include a key only if the input provides data for it):
- INVOICE_NUMBER (string)
- DATE_OF_ISSUE (string)
- SERVICE_DATE (string)
- DUE_DATE (string)
- PAYMENT_TERMS (string)
- CURRENCY (string)
- SUBTOTAL (string)
- TAX_RATE (string)
- TAX_AMOUNT (string)
- TOTAL_AMOUNT (string)
- TOTAL_AMOUNT_DUE (string)
- AMOUNT_PAID (string)
- SELLER_NAME (string)
- SELLER_ADDRESS (string)
- SELLER_PHONE (string)
- SELLER_EMAIL (string)
- BUYER_NAME (string)
- BUYER_ADDRESS (string)
- BUYER_PHONE (string)
- BUYER_EMAIL (string)
- LINE_ITEMS (array of objects). Each line-item object may contain:
    - DESCRIPTION (string)

Average Metric: 0.95 / 3 (31.6%): 100%|██████████| 3/3 [00:57<00:00, 19.00s/it]

2025/09/16 15:55:30 INFO dspy.evaluate.evaluate: Average Metric: 0.9468325791855203 / 3 (31.6%)





2025/09/16 15:56:22 INFO dspy.teleprompt.gepa.gepa: Iteration 34: Proposed new text for self: You are an "invoice extractor" whose job is to convert a free-form invoice text into a single JSON object that exactly follows a strict canonical schema and strict output rules. Read the entire input text and produce only one JSON object (no surrounding text, no explanation). Use the rules below exactly.

1) TOP-LEVEL SCHEMA (ONLY these keys; include a key ONLY when the input provides data for it)
- INVOICE_NUMBER (string)
- DATE_OF_ISSUE (string)
- SERVICE_DATE (string)
- DUE_DATE (string)
- PAYMENT_TERMS (string)
- CURRENCY (string)
- SUBTOTAL (string)
- TAX_RATE (string)
- TAX_AMOUNT (string)
- TOTAL_AMOUNT (string)
- TOTAL_AMOUNT_DUE (string)
- AMOUNT_PAID (string)
- SELLER_NAME (string)
- SELLER_ADDRESS (string)
- SELLER_PHONE (string)
- SELLER_EMAIL (string)
- BUYER_NAME (string)
- BUYER_ADDRESS (string)
- BUYER_PHONE (string)
- BUYER_EMAIL (string)
- LINE_ITEMS (array of objects). Each 

Average Metric: 1.21 / 3 (40.5%): 100%|██████████| 3/3 [01:01<00:00, 20.46s/it]

2025/09/16 15:58:35 INFO dspy.evaluate.evaluate: Average Metric: 1.2142857142857144 / 3 (40.5%)





2025/09/16 15:59:24 INFO dspy.teleprompt.gepa.gepa: Iteration 35: Proposed new text for self: You are an extractor assistant. Your task: convert a free-form invoice text into exactly one JSON object that follows a strict canonical schema and the rules below. Read the entire input text and output exactly one valid JSON object and nothing else (no explanation, no surrounding text). If no fields are matched at all, output {}.

1) Canonical top-level schema (only these keys may appear; include a key only if the input provides a value for it):
- INVOICE_NUMBER (string)
- DATE_OF_ISSUE (string)
- SERVICE_DATE (string)
- DUE_DATE (string)
- PAYMENT_TERMS (string)
- CURRENCY (string)
- SUBTOTAL (string)
- TAX_RATE (string)
- TAX_AMOUNT (string)
- TOTAL_AMOUNT (string)
- TOTAL_AMOUNT_DUE (string)
- AMOUNT_PAID (string)
- SELLER_NAME (string)
- SELLER_ADDRESS (string)
- SELLER_PHONE (string)
- SELLER_EMAIL (string)
- BUYER_NAME (string)
- BUYER_ADDRESS (string)
- BUYER_PHONE (string)
- BUYER_EMA

Average Metric: 1.07 / 3 (35.7%): 100%|██████████| 3/3 [01:04<00:00, 21.34s/it]

2025/09/16 16:01:54 INFO dspy.evaluate.evaluate: Average Metric: 1.0713562753036436 / 3 (35.7%)





2025/09/16 16:03:00 INFO dspy.teleprompt.gepa.gepa: Iteration 36: Proposed new text for self: You are an extractor whose job is to convert a free-form invoice text into a single JSON object exactly following a strict canonical schema. Read the entire input text and produce only one JSON object (no surrounding text, no explanation). Follow these rules exactly.

Top-level schema (include ONLY these keys and ONLY when the input provides data for them):
- INVOICE_NUMBER (string)
- DATE_OF_ISSUE (string)
- SERVICE_DATE (string)
- DUE_DATE (string)
- PAYMENT_TERMS (string)
- CURRENCY (string)
- SUBTOTAL (string)
- TAX_RATE (string)
- TAX_AMOUNT (string)
- TOTAL_AMOUNT (string)
- TOTAL_AMOUNT_DUE (string)
- AMOUNT_PAID (string)
- SELLER_NAME (string)
- SELLER_ADDRESS (string)
- SELLER_PHONE (string)
- SELLER_EMAIL (string)
- BUYER_NAME (string)
- BUYER_ADDRESS (string)
- BUYER_PHONE (string)
- BUYER_EMAIL (string)
- LINE_ITEMS (array of objects). Each line-item object may contain:
    - DESCR

Average Metric: 1.19 / 3 (39.5%): 100%|██████████| 3/3 [01:20<00:00, 26.90s/it]  

2025/09/16 16:05:52 INFO dspy.evaluate.evaluate: Average Metric: 1.1862745098039216 / 3 (39.5%)





2025/09/16 16:06:46 INFO dspy.teleprompt.gepa.gepa: Iteration 37: Proposed new text for self: You are an extractor whose job is to convert a free-form invoice text into a single JSON object exactly following a strict canonical schema. Follow these precise rules and heuristics. Read the entire input text before producing any output. Produce exactly one valid JSON object and nothing else.

TOP-LEVEL SCHEMA (ONLY these keys, and only include a key when the input provides data for it)
- INVOICE_NUMBER (string)
- DATE_OF_ISSUE (string)
- SERVICE_DATE (string)
- DUE_DATE (string)
- PAYMENT_TERMS (string)
- CURRENCY (string)
- SUBTOTAL (string)
- TAX_RATE (string)
- TAX_AMOUNT (string)
- TOTAL_AMOUNT (string)
- TOTAL_AMOUNT_DUE (string)
- AMOUNT_PAID (string)
- SELLER_NAME (string)
- SELLER_ADDRESS (string)
- SELLER_PHONE (string)
- SELLER_EMAIL (string)
- BUYER_NAME (string)
- BUYER_ADDRESS (string)
- BUYER_PHONE (string)
- BUYER_EMAIL (string)
- LINE_ITEMS (array of objects). Each line-item

Average Metric: 0.70 / 3 (23.2%): 100%|██████████| 3/3 [01:23<00:00, 27.72s/it]

2025/09/16 16:09:39 INFO dspy.evaluate.evaluate: Average Metric: 0.6968325791855203 / 3 (23.2%)





2025/09/16 16:10:44 INFO dspy.teleprompt.gepa.gepa: Iteration 38: Proposed new text for self: You are an invoice-extraction assistant. You will receive exactly one free-form plain-text invoice (often OCR-like with irregular spacing, broken tokens, weird characters, or line breaks). Your job is to extract a fixed set of standardized fields and return exactly one JSON object (and nothing else) following the strict schema and rules below.

CRITICAL OUTPUT RULES (must follow exactly)
- Output exactly one JSON object and nothing else — no commentary, no extra text, no logging.
- The JSON object must contain every top-level key listed below. Do NOT add or remove top-level keys.
- If a field cannot be found, set its value to null — EXCEPT ITEMS which must always be an array (use [] if no items).
- Use the exact key names and types defined below. Values must be of the required types:
  - Most fields: string or null.
  - QTY/HOURS inside ITEMS: numbers (integer for QTY where possible; integer o

Average Metric: 0.57 / 3 (19.1%): 100%|██████████| 3/3 [00:58<00:00, 19.42s/it]

2025/09/16 16:12:53 INFO dspy.evaluate.evaluate: Average Metric: 0.5726495726495726 / 3 (19.1%)





2025/09/16 16:13:54 INFO dspy.teleprompt.gepa.gepa: Iteration 39: Proposed new text for self: You are an invoice-extraction assistant. You will receive a single free-form plain-text invoice (often OCR-like with irregular spacing, broken tokens, weird characters, or line breaks). Your job is to extract a fixed set of standardized fields and return exactly one JSON object (and nothing else) following the strict schema and rules below.

ABSOLUTE OUTPUT REQUIREMENTS (must follow exactly)
- Output exactly one JSON object and nothing else (no commentary, no extra text, no surrounding markdown). The consumer of this output expects only this JSON.
- The JSON must contain every top-level key listed below. If a field cannot be found, set its value to null (except ITEMS which must be an array — use [] if no items).
- Do NOT add any top-level keys beyond the required list.
- Types must be exact:
  - Top-level text fields: string or null.
  - INVOICE_DATE (DATE) either ISO "YYYY-MM-DD" string if pa

Average Metric: 0.84 / 3 (28.0%): 100%|██████████| 3/3 [02:27<00:00, 49.13s/it]  

2025/09/16 16:17:12 INFO dspy.evaluate.evaluate: Average Metric: 0.8410633484162896 / 3 (28.0%)





2025/09/16 16:18:07 INFO dspy.teleprompt.gepa.gepa: Iteration 40: Proposed new text for self: You are an invoice-extraction assistant. You will receive a single free-form plain-text invoice (often OCR-like with irregular spacing, broken tokens, weird characters, or line breaks). Your job is to extract a fixed set of standardized fields and return exactly one JSON object (and nothing else) following the strict schema and rules below.

CRITICAL OUTPUT RULES (must follow exactly)
- Output exactly one JSON object and nothing else (no commentary, no extra text).
- The JSON object must contain every top-level key listed below. If a field cannot be found, set its value to null (except ITEMS which must be an array — use [] if no items).
- Do NOT add any top-level keys beyond the required list.
- Values must use the required types: strings or null for most fields; numbers for QTY/HOURS when parsed; ITEMS must be an array of ITEM objects.
- Always return valid JSON.
- Never output empty strings 

Average Metric: 1.35 / 3 (45.1%): 100%|██████████| 3/3 [01:13<00:00, 24.54s/it]

2025/09/16 16:21:52 INFO dspy.evaluate.evaluate: Average Metric: 1.3529411764705883 / 3 (45.1%)





2025/09/16 16:22:35 INFO dspy.teleprompt.gepa.gepa: Iteration 41: Proposed new text for self: You are an extractor whose job is to convert a free-form invoice text into a single JSON object exactly following the canonical schema below. Use all heuristics and domain rules in this instruction to extract fields precisely and conservatively. Always prioritize correctness over guessing. Produce only one JSON object and nothing else.

Canonical top-level schema (ONLY these keys may appear; include a key ONLY when the input provides data for it):
- INVOICE_NUMBER (string)
- DATE_OF_ISSUE (string)
- SERVICE_DATE (string)
- DUE_DATE (string)
- PAYMENT_TERMS (string)
- CURRENCY (string)
- SUBTOTAL (string)
- TAX_RATE (string)
- TAX_AMOUNT (string)
- TOTAL_AMOUNT (string)
- TOTAL_AMOUNT_DUE (string)
- AMOUNT_PAID (string)
- SELLER_NAME (string)
- SELLER_ADDRESS (string)
- SELLER_PHONE (string)
- SELLER_EMAIL (string)
- BUYER_NAME (string)
- BUYER_ADDRESS (string)
- BUYER_PHONE (string)
- BUYER_EM

Average Metric: 1.17 / 3 (39.1%): 100%|██████████| 3/3 [01:02<00:00, 20.77s/it]

2025/09/16 16:26:26 INFO dspy.evaluate.evaluate: Average Metric: 1.1733821733821734 / 3 (39.1%)





2025/09/16 16:27:31 INFO dspy.teleprompt.gepa.gepa: Iteration 42: Proposed new text for self: You are an "invoice extractor" whose job is to convert a free-form invoice text into a single JSON object that exactly follows the canonical schema below. Use all heuristics and domain rules in this instruction to extract fields precisely and conservatively. Always prioritize correctness over guessing. Produce only one JSON object and nothing else.

Canonical top-level schema (ONLY these keys may appear; include a key ONLY when the input provides data for it):
- INVOICE_NUMBER (string)
- DATE_OF_ISSUE (string)
- SERVICE_DATE (string)
- DUE_DATE (string)
- PAYMENT_TERMS (string)
- CURRENCY (string)
- SUBTOTAL (string)
- TAX_RATE (string)
- TAX_AMOUNT (string)
- TOTAL_AMOUNT (string)
- TOTAL_AMOUNT_DUE (string)
- AMOUNT_PAID (string)
- SELLER_NAME (string)
- SELLER_ADDRESS (string)
- SELLER_PHONE (string)
- SELLER_EMAIL (string)
- BUYER_NAME (string)
- BUYER_ADDRESS (string)
- BUYER_PHONE (strin

Average Metric: 1.09 / 3 (36.5%): 100%|██████████| 3/3 [00:00<00:00, 3905.31it/s]

2025/09/16 16:28:48 INFO dspy.evaluate.evaluate: Average Metric: 1.094017094017094 / 3 (36.5%)





2025/09/16 16:29:44 INFO dspy.teleprompt.gepa.gepa: Iteration 43: Proposed new text for self: Goal
- From free-form invoice text, extract key invoice fields and line items and return them as a single JSON object (dictionary). The JSON must follow the exact schema and normalization rules below. Output only the JSON object (no prose, no extra keys, no explanations).

High-level rules
1. Only output keys from the schema below. Do not invent other top-level keys.
2. Use the exact key names (all uppercase with underscores) specified in the schema.
3. If a field is not present or cannot be reliably extracted, set its value to null (JSON null).
4. Amounts and numeric monetary values: store as numeric strings containing only digits and a decimal point (no currency symbols, no thousands separators). Examples: "2250.00", "50.00", "1234567890". If the source value has no decimal part, still format with two decimals ("50.00").
5. Quantities/hours: store as numeric strings (integer or decimal) with

Average Metric: 1.04 / 3 (34.7%): 100%|██████████| 3/3 [00:00<00:00, 4329.98it/s]

2025/09/16 16:30:22 INFO dspy.evaluate.evaluate: Average Metric: 1.0420168067226891 / 3 (34.7%)





2025/09/16 16:30:57 INFO dspy.teleprompt.gepa.gepa: Iteration 44: Proposed new text for self: You are an extraction assistant. Your job: given free-form invoice text, return a single JSON object (no extra text) containing only the canonical invoice fields described below. Do not add any other keys. If a field cannot be found or confidently inferred from the text, set its value to null.

Output format rules
- Output exactly one JSON object (no surrounding explanation, no markdown).
- Only include the keys in the "Canonical schema" below. Do not emit extra keys.
- Use null (JSON null) for unknown/missing values.
- For numeric monetary values and rates return JSON numbers (not strings). For currency, return a 3-letter ISO code string (e.g., "USD") if you can detect it; otherwise null.
- For dates return ISO 8601 date strings (YYYY-MM-DD) when the date can be unambiguously parsed. If parsing fails, set the field to null.
- For phone/email/website provide strings normalized (trim whitespace

Average Metric: 0.80 / 3 (26.7%): 100%|██████████| 3/3 [00:00<00:00, 489.80it/s]

2025/09/16 16:32:27 INFO dspy.evaluate.evaluate: Average Metric: 0.8016194331983806 / 3 (26.7%)





2025/09/16 16:33:08 INFO dspy.teleprompt.gepa.gepa: Iteration 45: Proposed new text for self: You are given free-form invoice text. Your task is to extract key invoice fields and return them as a single JSON dictionary. Follow these rules precisely.

1) Output format
- Output MUST be a single JSON object (dictionary). Do not output any additional text, explanations, or keys not in the allowed schema.
- Use UPPERCASE keys with underscores exactly as listed in the allowed schema below.
- Omit keys that are not present in the invoice (do not output null unless the invoice explicitly contains a blank field). Prefer omission over adding empty values.
- Numbered line items must be represented using keys with a numeric suffix: ITEM_1_…, ITEM_2_…, etc. Start numbering at 1 and include only items actually present.

2) Allowed schema (only these keys may appear)
- INVOICE_NUMBER
- INVOICE_DATE
- DATE_OF_ISSUE
- DUE_DATE
- SERVICE_DATE
- PO_NUMBER
- SELLER_COMPANY
- SELLER_NAME
- SELLER_TITLE
- S

Average Metric: 0.61 / 3 (20.4%): 100%|██████████| 3/3 [01:03<00:00, 21.11s/it] 

2025/09/16 16:35:12 INFO dspy.evaluate.evaluate: Average Metric: 0.6117216117216118 / 3 (20.4%)





2025/09/16 16:36:22 INFO dspy.teleprompt.gepa.gepa: Iteration 46: Proposed new text for self: You are an invoice-extraction assistant. You will be given one free-form plain-text invoice (often OCR-like with irregular spacing, broken tokens, odd characters or line breaks). Your job is to extract a fixed set of standardized fields and return exactly one JSON object and nothing else. This document is the authoritative extraction specification and must be followed precisely.

CRITICAL OUTPUT RULES
- Output exactly one JSON object and nothing else (no commentary, no extra text, no surrounding code fences).
- The JSON must contain every top-level key listed below. If a field cannot be found, set its value to null (except ITEMS which must be an array — use [] if no items).
- Do NOT add any top-level keys beyond the required list.
- Values must be of the required types (strings or null for most fields; numbers for QTY/HOURS when parsed; ITEMS must be an array of ITEM objects).
- Always return 

Average Metric: 0.70 / 3 (23.2%): 100%|██████████| 3/3 [01:03<00:00, 21.13s/it]

2025/09/16 16:38:41 INFO dspy.evaluate.evaluate: Average Metric: 0.6951923076923077 / 3 (23.2%)





2025/09/16 16:39:43 INFO dspy.teleprompt.gepa.gepa: Iteration 47: Proposed new text for self: You are an invoice-extraction assistant. You will be given a single free-form plain-text invoice (often OCR-like with irregular spacing, broken tokens, weird characters, or line breaks). Your job is to extract a fixed set of standardized fields and return exactly one JSON object (and nothing else) following the strict schema and rules below.

CRITICAL: OUTPUT REQUIREMENTS
- Output exactly one JSON object and nothing else (no explanation, no commentary, no extra text).
- The JSON object must contain exactly the top-level keys listed in "REQUIRED TOP-LEVEL KEYS" below. Do NOT add or remove top-level keys.
- If a field cannot be found, set its value to null, except ITEMS which must always be an array (use [] if no items).
- Values must be of the exact required types (strings or null for most fields; QTY/HOURS must be numbers when parsed; ITEMS must be an array of ITEM objects).
- Never return emp

Average Metric: 1.03 / 3 (34.3%): 100%|██████████| 3/3 [01:29<00:00, 29.94s/it]

2025/09/16 16:42:08 INFO dspy.evaluate.evaluate: Average Metric: 1.0294117647058822 / 3 (34.3%)





2025/09/16 16:43:13 INFO dspy.teleprompt.gepa.gepa: Iteration 48: Proposed new text for self: You are an extractor whose job is to convert a free-form invoice text into a single JSON object exactly following the canonical schema and rules below. Use all heuristics and domain rules here to extract fields precisely and conservatively. Always prioritize correctness and conservatism over guessing. Produce only one JSON object and nothing else.

Canonical top-level schema (ONLY these keys may appear; include a key ONLY when the input provides data for it):
- INVOICE_NUMBER (string)
- DATE_OF_ISSUE (string)
- SERVICE_DATE (string)
- DUE_DATE (string)
- PAYMENT_TERMS (string)
- CURRENCY (string)
- SUBTOTAL (string)
- TAX_RATE (string)
- TAX_AMOUNT (string)
- TOTAL_AMOUNT (string)
- TOTAL_AMOUNT_DUE (string)
- AMOUNT_PAID (string)
- SELLER_NAME (string)
- SELLER_ADDRESS (string)
- SELLER_PHONE (string)
- SELLER_EMAIL (string)
- BUYER_NAME (string)
- BUYER_ADDRESS (string)
- BUYER_PHONE (string

Average Metric: 1.22 / 3 (40.8%): 100%|██████████| 3/3 [01:22<00:00, 27.43s/it]

2025/09/16 16:47:58 INFO dspy.evaluate.evaluate: Average Metric: 1.2247360482654601 / 3 (40.8%)





2025/09/16 16:48:57 INFO dspy.teleprompt.gepa.gepa: Iteration 49: Proposed new text for self: You are an extractor whose job is to convert a free-form invoice text into a single JSON object exactly following the canonical schema and rules below. Use all heuristics and domain rules in this instruction to extract fields precisely and conservatively. Always prioritize correctness over guessing. Produce only one JSON object and nothing else.

Canonical top-level schema (ONLY these keys may appear; include a key ONLY when the input provides data for it):
- INVOICE_NUMBER (string)
- DATE_OF_ISSUE (string)
- SERVICE_DATE (string)
- DUE_DATE (string)
- PAYMENT_TERMS (string)
- CURRENCY (string)
- SUBTOTAL (string)
- TAX_RATE (string)
- TAX_AMOUNT (string)
- TOTAL_AMOUNT (string)
- TOTAL_AMOUNT_DUE (string)
- AMOUNT_PAID (string)
- SELLER_NAME (string)
- SELLER_ADDRESS (string)
- SELLER_PHONE (string)
- SELLER_EMAIL (string)
- BUYER_NAME (string)
- BUYER_ADDRESS (string)
- BUYER_PHONE (string)


Average Metric: 0.97 / 3 (32.5%): 100%|██████████| 3/3 [00:57<00:00, 19.11s/it]

2025/09/16 16:51:38 INFO dspy.evaluate.evaluate: Average Metric: 0.9741902834008097 / 3 (32.5%)





2025/09/16 16:52:40 INFO dspy.teleprompt.gepa.gepa: Iteration 50: Proposed new text for self: You are an invoice-extraction assistant. You will receive a single free-form plain-text invoice (often OCR-like). Your job is to extract a fixed set of standardized fields and return exactly one JSON object (and nothing else) following the strict schema, types, and rules below.

VERY IMPORTANT — OUTPUT CONSTRAINTS
- Output exactly one JSON object and nothing else (no commentary, no explanation, no surrounding text).
- The JSON must contain every top-level key listed below. If a field cannot be found, set its value to null (except ITEMS which must be an array — use [] if no items).
- Do NOT add any top-level keys beyond the required list.
- Values must be of the required types. For numeric QTY/HOURS put numbers (integers or floats); for most fields use strings or null as specified; ITEMS must be an array of ITEM objects with exactly the keys specified.
- Always return valid JSON. No empty strin

Optimized field accuracy on valid: EvaluationResult(score=32.29, results=<list of 10 results>)


In [61]:
# Evaluate the optimized program on validation set
opt_score = evaluate(optimized_program)
print("Optimized field accuracy on validation:", opt_score)


2025/09/16 17:08:22 INFO dspy.evaluate.evaluate: Average Metric: 3.228846153846154 / 10 (32.3%)


Optimized field accuracy on validation: EvaluationResult(score=32.29, results=<list of 10 results>)


In [62]:
# Quick demo on a random validation example
sample = valid_examples[0]
print("Sample comparison:")
print("\nINPUT TEXT:")
print("-" * 80)
print(sample.text[:300], "...")
print("-" * 80)

print("\nBASE MODEL PREDICTION:")
base_pred = extractor(text=sample.text)
print(f"Rationale: {base_pred.rationale}")
print("\nExtracted fields:")
for k,v in (base_pred.extracted or {}).items():
    print(f"{k:20s}: {v}")

print("\nOPTIMIZED MODEL PREDICTION:")
opt_pred = optimized_program(text=sample.text)
print(f"Rationale: {opt_pred.rationale}")
print("\nExtracted fields:")
for k,v in (opt_pred.extracted or {}).items():
    print(f"{k:20s}: {v}")

print("\nGROUND TRUTH:")
print("\nExtracted fields:")
for k,v in sample.target.items():
    print(f"{k:20s}: {v}")


Sample comparison:

INPUT TEXT:
--------------------------------------------------------------------------------
Green Blue Pink Vintage Retro Freelance Invoice

NO. DESCRIPTION RATE QTY TOTAL
01 Graphic design for website 45 20 900.00
02 Graphic design for social content 45 8 360.00
03 Account management fee 280 1 280.00
04 Two days onsite 640 2 1280.00
SUB TOTAL $ 2820.00
TAX $ 225.60
S&H nil
TOTAL $ 3045.60 ...
--------------------------------------------------------------------------------

BASE MODEL PREDICTION:
Rationale: Detected invoice fields parsed from the text:
- Invoice title
- Invoice number ("NO. 001")
- Invoice date ("02.05.2024")
- From (biller) name, company, email, address, code
- To (client) name, company, email, address, code
- Line items (4 entries) with rate, quantity, totals
- Subtotal, Tax, Shipping (S&H), Total
- Bank / account details (Account Name, Bank, Acc)
- Payment instructions / terms ("Please pay within 60 days", cheques payable)
- Notes text

Extracte

In [63]:
# Compare predictions with ground truth
print("\nField Comparison:")
print("-" * 80)
for field in sample.target.keys():
    gt_value = sample.target[field]
    opt_value = opt_pred.extracted.get(field, "MISSING")
    match = "✓" if gt_value == opt_value else "✗"
    print(f"{field:20s}: {match}")
    if match == "✗":
        print(f"  Ground truth: {gt_value}")
        print(f"  Prediction:   {opt_value}")
print("-" * 80)



Field Comparison:
--------------------------------------------------------------------------------
INVOICE_NUMBER      : ✓
DATE                : ✗
  Ground truth: 02.05.2024
  Prediction:   MISSING
BILL_TO             : ✗
  Ground truth: Rosa Maria Aguado
  Prediction:   MISSING
COMPANY             : ✗
  Ground truth: Aldenaire & Partners
  Prediction:   MISSING
ADDRESS             : ✗
  Ground truth: 123 Anywhere St., Any City, ST 12345
  Prediction:   MISSING
EMAIL               : ✗
  Ground truth: hello@reallygreatsite.com
  Prediction:   MISSING
BANK_NAME           : ✓
BANK_ACCOUNT        : ✓
ITEM_DESCRIPTION    : ✗
  Ground truth: Two days onsite
  Prediction:   MISSING
QTY                 : ✗
  Ground truth: 2
  Prediction:   MISSING
RATE                : ✗
  Ground truth: 640
  Prediction:   MISSING
AMOUNT              : ✗
  Ground truth: 1280.00
  Prediction:   MISSING
SUBTOTAL            : ✗
  Ground truth: $ 2820.00
  Prediction:   2820.00
TAX                 : ✗
  Ground tr

In [64]:
# Show the last interaction for transparency
print("\nInspecting last prompt:")
dspy.inspect_history(n=1)


Inspecting last prompt:




[34m[2025-09-16T17:08:31.058488][0m

[31mSystem message:[0m

Your input fields are:
1. `text` (str): Raw invoice text
Your output fields are:
1. `rationale` (str): Brief reasoning, list detected fields
2. `extracted` (dict): JSON dict with only UPPERCASE dataset keys and string values
All interactions will be structured in the following way, with the appropriate values filled in.

[[ ## text ## ]]
{text}

[[ ## rationale ## ]]
{rationale}

[[ ## extracted ## ]]
{extracted}        # note: the value you produce must adhere to the JSON schema: {"type": "object", "additionalProperties": true}

[[ ## completed ## ]]
In adhering to this structure, your objective is: 
        Extract key-value invoice fields as a JSON dict from free-form invoice text.


[31mUser message:[0m

[[ ## text ## ]]
Green Blue Pink Vintage Retro Freelance Invoice

NO. DESCRIPTION RATE QTY TOTAL
01 Graphic design for website 45 20 900.00
02 Graphic design for social content 45 8 360.