In [None]:
# Install dependencies (run once per environment)
%pip install -q dspy pandas python-dotenv


In [None]:
# Basic imports and environment setup
import os
import json
import dspy
import pandas as pd
from dotenv import load_dotenv

# Load API keys from .env (OPENAI_API_KEY is expected)
load_dotenv()

# Configure DSPy default LM similar to other notebooks
lm = dspy.LM("openai/gpt-5-mini", api_key=os.getenv("OPENAI_API_KEY"), temperature=0.2, max_tokens=4000)
dspy.configure(lm=lm)

print("DSPy configured for invoice extraction.")


In [None]:
# Load dataset and preview a few rows
# Prefer absolute path next to this notebook; fallback to project root
nb_dir = os.path.dirname(os.path.abspath("__file__")) if "__file__" in globals() else os.path.abspath("./dspy")
candidates = [
    os.path.join(nb_dir, "invoice_ner_dataset.csv"),
    os.path.abspath("./dspy/invoice_ner_dataset.csv"),
    os.path.abspath("invoice_ner_dataset.csv"),
]
for p in candidates:
    if os.path.exists(p):
        csv_path = p
        break
else:
    raise FileNotFoundError("invoice_ner_dataset.csv not found in expected locations")

raw_df = pd.read_csv(csv_path)
print("Rows:", len(raw_df))
print(raw_df.head(2))

# Parse Final_Output JSON strings into dicts
raw_df["Final_Output"] = raw_df["Final_Output"].apply(lambda s: json.loads(s))

# Build small train/test splits for quick iteration
# Keep it tiny for a simple example; adjust as needed
train_df = raw_df.iloc[:30].copy()
valid_df = raw_df.iloc[30:40].copy()

def to_examples(df):
    examples = []
    for _, row in df.iterrows():
        examples.append(dspy.Example(text=row["Input"], target=row["Final_Output"]).with_inputs("text"))
    return examples

train_examples = to_examples(train_df)
valid_examples = to_examples(valid_df)

print(f"Train examples: {len(train_examples)}, Valid examples: {len(valid_examples)}")


In [None]:
# Define a signature for extracting a JSON dict of invoice fields
class InvoiceExtraction(dspy.Signature):
    """
    Extract key-value invoice fields as a JSON dict from free-form invoice text.
    The output must be valid JSON, parseable into a Python dict[str, str].
    Do not invent fields; prefer fields present in the text.
    """
    text: str = dspy.InputField(description="Raw invoice text")
    rationale: str = dspy.OutputField(description="Brief reasoning, list detected fields")
    extracted: dict = dspy.OutputField(description="JSON dict of extracted fields (keys and string values)")

# Base module (simple Predict)
extractor = dspy.Predict(InvoiceExtraction)

print("Signature and extractor ready.")


In [None]:
# Define a simple exact match metric: compare normalized dicts
from dspy.evaluate import Evaluate


def normalize_dict(d: dict) -> dict:
    if d is None:
        return {}
    # Convert keys to upper snake like in dataset, strip whitespace
    def norm_key(k: str) -> str:
        return str(k).strip()
    def norm_val(v: str) -> str:
        return str(v).strip()
    return {norm_key(k): norm_val(v) for k, v in d.items()}


def em_metric(example: dspy.Example, pred: dspy.Prediction, trace=None) -> float:
    gold = normalize_dict(example.target)
    got = normalize_dict(getattr(pred, "extracted", {}))
    return float(got == gold)


# Wrap a callable for Evaluate
class ExtractCallable:
    def __call__(self, example: dspy.Example):
        return extractor(text=example.text)


evaluate = Evaluate(ordered=True, metric=em_metric)

initial_score = evaluate(ExtractCallable(), valid_examples)
print("Initial EM on valid:", initial_score)


In [None]:
# Optimize with BootstrapFewShot using a small subset
from dspy.optimizers import BootstrapFewShot

# Keep k small for speed in demo
optimizer = BootstrapFewShot(
    metric=em_metric,
    max_bootstrapped_demos=8,
)

# Optimizing a ChainOfThought wrapper around our signature generally helps
program = dspy.ChainOfThought(InvoiceExtraction)

optimized = optimizer.compile(
    program=program,
    trainset=train_examples,
    valset=valid_examples,
)

# Evaluate optimized program
class OptimizedCallable:
    def __call__(self, example: dspy.Example):
        return optimized(text=example.text)

opt_score = evaluate(OptimizedCallable(), valid_examples)
print("Optimized EM on valid:", opt_score)


In [None]:
# Quick demo on a random validation example
sample = valid_examples[0]
print("INPUT (truncated):\n", sample.text[:600], "...\n")

base_pred = extractor(text=sample.text)
print("Base extracted keys:", list((base_pred.extracted or {}).keys()))

opt_pred = optimized(text=sample.text)
print("Optimized extracted keys:", list((opt_pred.extracted or {}).keys()))

print("\nGold keys:", list(sample.target.keys()))

# Show the last interaction for transparency
print("\nInspecting last prompt:")
dspy.inspect_history(n=1)
