
# Structured Outputs & Function Calling — LLM Extractor (Gemini Flash)

This version **uses Gemini Flash** (e.g., `gemini-2.0-flash-lite`) for field extraction.

**Prereq:** Set your API key in the environment before running

1. Configure the Google Generative AI SDK  
2. Define a **strict JSON schema** and a validator  
3. Use **Gemini Flash** to extract the structured fields from emails (zero-shot + optional few-shot)  
4. Validate, evaluate, and **safely call tools** with full **audit logs**  
5. Keep rationales **internal** while exposing a concise public summary



**Input email example:**
> “My shipment #A123 is late. Order placed July 1, expedited shipping paid, still no tracking update. Need a refund for shipping and an ETA.”

**Target JSON:**
```json
{
  "intent": "delivery_issue",
  "order_id": "A123",
  "requested_actions": ["refund_shipping","status_update"],
  "priority": "high",
  "customer_tone": "frustrated",
  "next_step": "create_case"
}
```
**Available tools:** `get_order(order_id)`, `get_policy(topic)`, `create_case(payload)`, `send_email(to, template_id, vars)`


## Setup

In [None]:
from google.colab import userdata
try:
    import google.generativeai as genai
    genai.configure(api_key=userdata.get('GOOGLE_API_KEY'))
    _GEMINI_READY = bool(userdata.get('GOOGLE_API_KEY'))
except Exception:
    print("Install google-generativeai to enable live calls.")
    _GEMINI_READY = False

In [None]:
import os, json, re, sys, time, textwrap
from datetime import datetime

MODEL_NAME = "gemini-2.0-flash-lite"

if not _GEMINI_READY:
    print("WARNING: GOOGLE_API_KEY is not set. Set it via environment before running LLM cells.")
else:
    print("GOOGLE_API_KEY detected. Model:", MODEL_NAME)

## Sample Ground Truth & Utilities

In [None]:
ORDER_DB = {
    "A123": {"order_id":"A123","status":"delayed","ship_method":"expedited","placed":"2025-07-01","eta":"2025-07-04","email":"alice@example.com"},
    "B456": {"order_id":"B456","status":"shipped","ship_method":"standard","placed":"2025-07-02","eta":"2025-07-10","email":"bob@example.com"},
    "C789": {"order_id":"C789","status":"processing","ship_method":"expedited","placed":"2025-06-28","eta":None,"email":"carol@example.com"},
}

POLICY_PAGES = {
    "refund_shipping": {"policy":"Refund expedited shipping if carrier delay > 24h.", "requires":"carrier_delay_proven"},
    "status_update": {"policy":"Provide latest carrier status and ETA.", "requires":None},
}

SLA_TABLE = {
    "delivery_issue": {"priority_rules":"expedited+late → high", "default_priority":"medium", "target_response_hours":4},
}

def pretty(obj):
    print(json.dumps(obj, indent=2, ensure_ascii=False))

## JSON Schema & Validator

In [None]:
TARGET_SCHEMA = {
    "type": "object",
    "properties": {
        "intent": {"type":"string", "enum":["delivery_issue","billing_issue","return_request","unknown"]},
        "order_id": {"type":"string", "pattern":"^[A-Z0-9-]{2,10}$"},
        "requested_actions": {"type":"array","items":{"type":"string","enum":["refund_shipping","status_update","escalate","none"]}},
        "priority": {"type":"string","enum":["low","medium","high","urgent"]},
        "customer_tone": {"type":"string","enum":["neutral","frustrated","angry","confused","positive"]},
        "next_step": {"type":"string","enum":["create_case","ask_for_more_info","auto_resolve","none"]},
        "rationale_internal": {"type":"string"}
    },
    "required": ["intent","requested_actions","priority","customer_tone","next_step"],
    "additionalProperties": False
}

import re

def validate_against_schema(obj, schema=TARGET_SCHEMA):
    """
    Validate obj against schema, but replace None with safe fallbacks
    to prevent unnecessary validation errors.
    """
    errors = []
    props = schema.get("properties", {})
    required = schema.get("required", [])

    # Fallback defaults for None
    fallbacks = {
        "string": "",
        "array": [],
    }

    # Normalize: replace None with fallbacks
    normalized = {}
    for k, v in obj.items():
        spec = props.get(k)
        if spec and v is None:
            if spec.get("type") == "string":
                normalized[k] = fallbacks["string"]
            elif spec.get("type") == "array":
                normalized[k] = fallbacks["array"]
            else:
                normalized[k] = v  # leave untouched
        else:
            normalized[k] = v

    # Required fields
    for key in required:
        if key not in normalized:
            errors.append(f"Missing required field: {key}")

    # Additional properties
    if schema.get("additionalProperties", True) is False:
        for k in normalized.keys():
            if k not in props:
                errors.append(f"Unexpected field: {k}")

    # Per-field checks
    import re
    for k, spec in props.items():
        if k not in normalized:
            continue
        val = normalized[k]

        if spec.get("type") == "string":
            if not isinstance(val, str):
                errors.append(f"Field {k} must be string")
            elif "enum" in spec and val and val not in spec["enum"]:
                errors.append(f"Field {k} not in enum {spec['enum']} (got: {val})")
            elif "pattern" in spec and val and not re.match(spec["pattern"], val):
                errors.append(f"Field {k} fails pattern {spec['pattern']} (got: {val})")

        elif spec.get("type") == "array":
            if not isinstance(val, list):
                errors.append(f"Field {k} must be array")
            else:
                item = spec.get("items", {})
                if item.get("type") == "string" and "enum" in item:
                    for i, v in enumerate(val):
                        if not isinstance(v, str):
                            errors.append(f"Field {k}[{i}] must be string")
                        elif v not in item["enum"]:
                            errors.append(f"Field {k}[{i}] not in enum {item['enum']} (got: {v})")

    return errors

## Mini-Dataset (Emails)

In [None]:
EMAILS = [
    {
        "id":"e1",
        "text": "My shipment #A123 is late. Paid for expedited! No tracking updates. Please refund shipping and tell me when it arrives.",
        "gold": {
            "intent":"delivery_issue",
            "order_id":"A123",
            "requested_actions":["refund_shipping","status_update"],
            "priority":"high",
            "customer_tone":"frustrated",
            "next_step":"create_case"
        }
    },
    {
        "id":"e2",
        "text": "Order B456 shows shipped, but link is broken. Can you send me the correct tracking page?",
        "gold": {
            "intent":"delivery_issue",
            "order_id":"B456",
            "requested_actions":["status_update"],
            "priority":"medium",
            "customer_tone":"neutral",
            "next_step":"create_case"
        }
    },
    {
        "id":"e3",
        "text": "Hello—my order C789 promised overnight but still processing. I'm honestly pretty angry.",
        "gold": {
            "intent":"delivery_issue",
            "order_id":"C789",
            "requested_actions":["status_update","refund_shipping"],
            "priority":"high",
            "customer_tone":"angry",
            "next_step":"create_case"
        }
    },
    {
        "id":"e4",
        "text": "I think my package A123 is lost. If policy allows, escalate and refund my shipping fee.",
        "gold": {
            "intent":"delivery_issue",
            "order_id":"A123",
            "requested_actions":["refund_shipping","escalate"],
            "priority":"high",
            "customer_tone":"frustrated",
            "next_step":"create_case"
        }
    },
    {
        "id":"e5",
        "text": "My shipment is late. I paid extra. Need status and refund.",
        "gold": {
            "intent":"delivery_issue",
            "order_id": None,
            "requested_actions":["refund_shipping","status_update"],
            "priority":"medium",
            "customer_tone":"frustrated",
            "next_step":"ask_for_more_info"
        }
    }
]
len(EMAILS)

## Few-Shot Exemplars (Optional Prompt Fuel)

In [None]:
FEW_SHOT = [
    {
        "input": "My shipment #A123 is late. Please refund shipping and tell me when it arrives.",
        "output": {
            "intent":"delivery_issue",
            "order_id":"A123",
            "requested_actions":["refund_shipping","status_update"],
            "priority":"high",
            "customer_tone":"frustrated",
            "next_step":"create_case"
        }
    },
    {
        "input": "The order C789 was promised overnight but still processing; I'm angry.",
        "output": {
            "intent":"delivery_issue",
            "order_id":"C789",
            "requested_actions":["status_update","refund_shipping"],
            "priority":"high",
            "customer_tone":"angry",
            "next_step":"create_case"
        }
    },
    {
        "input": "My shipment is late, but I don't have the order number on hand.",
        "output": {
            "intent":"delivery_issue",
            "order_id": None,
            "requested_actions":["status_update","refund_shipping"],
            "priority":"medium",
            "customer_tone":"frustrated",
            "next_step":"ask_for_more_info"
        }
    }
]
print(len(FEW_SHOT), "few-shot examples ready.")

## LLM Extractor — Gemini Flash

In [None]:
def build_prompt(email_text, schema, few_shot=None):
    examples = ""
    if few_shot:
        for ex in few_shot:
            examples += f"""
Example Input:
{ex['input']}
Example Output (JSON):
{json.dumps(ex['output'], ensure_ascii=False)}
"""
    contract = json.dumps(schema, ensure_ascii=False, indent=2)
    instruction = f"""
You are an information extraction model for support ticket triage.
Return ONLY a JSON object matching this schema (no markdown fences, no extra text, no comments):

Schema (for reference, not to output):
{contract}

Rules:
- Output exactly the fields permitted by the schema.
- Use enums exactly as specified.
- If a field is unknown or not present (e.g., order_id), set it to null or a sensible default allowed by the schema.
- Keep any reasoning very short in "rationale_internal".
- Do NOT invent IDs that you don't see or can't infer safely.
- If the intent seems unrelated to delivery, set intent to "unknown".

{examples}

Now extract from this email:
{email_text}
Return JSON only.
"""
    return instruction

def call_gemini_for_json(email_text, schema, few_shot=None):
    if not _GEMINI_READY:
        raise RuntimeError("GOOGLE_API_KEY not configured or SDK not available.")
    import google.generativeai as genai
    model = genai.GenerativeModel(os.getenv("GEMINI_MODEL", "gemini-2.5-flash"))
    prompt = build_prompt(email_text, schema, few_shot=few_shot)
    resp = model.generate_content(prompt)
    text = getattr(resp, "text", None)
    if not text and hasattr(resp, "candidates") and resp.candidates:
        # fallback extraction
        parts = getattr(resp.candidates[0].content, "parts", None)
        if parts and hasattr(parts[0], "text"):
            text = parts[0].text
    if not text:
        raise RuntimeError("Empty response from model")
    s = text.strip()
    if s.startswith("```"):
        s = s.strip("`")
        s = s.split("\n", 1)[1] if "\n" in s else s
    start = s.find("{")
    end = s.rfind("}")
    if start >= 0 and end > start:
        s = s[start:end+1]
    obj = json.loads(s)
    return obj

def extract_structured_llm(email_text, use_few_shot=True):
    fs = FEW_SHOT if use_few_shot else None
    obj = call_gemini_for_json(email_text, TARGET_SCHEMA, few_shot=fs)
    errs = validate_against_schema(obj)
    if errs:
        raise ValueError("Schema validation failed after LLM extraction: " + "; ".join(errs))
    return obj

print(f"Ready to extract with Gemini model: {MODEL_NAME}")


## Validate an LLM Output

In [None]:
if _GEMINI_READY:
    pred = extract_structured_llm(EMAILS[0]["text"])
    print(f"Email content: \n{EMAILS[0]["text"]}\n")
    print("LLM extraction result:")
    pretty({"pred": pred, "errors": validate_against_schema(pred)})
else:
    print("Set GOOGLE_API_KEY to run this cell.")

## Offline Evaluation — LLM

In [None]:
ERROR_TAXONOMY = {
    "missing_field": "A required field is absent.",
    "enum_mismatch": "A value falls outside the permitted enum.",
    "hallucinated_id": "An ID was invented but not present in text or DB.",
    "type_error": "Type mismatch vs. schema.",
    "format_error": "Regex/pattern mismatch.",
}

def classify_validation_errors(errors):
    tags = []
    for e in errors:
        if "Missing required field" in e:
            tags.append("missing_field")
        elif "enum" in e:
            tags.append("enum_mismatch")
        elif "pattern" in e:
            tags.append("format_error")
        elif "must be" in e:
            tags.append("type_error")
    return tags or None

def field_accuracy(pred, gold):
    keys = set((gold or {}).keys()) | set((pred or {}).keys())
    correct = 0
    total = 0
    for k in keys:
        if k == "rationale_internal": 
            continue
        total += 1
        if (gold or {}).get(k) == (pred or {}).get(k):
            correct += 1
    return correct, total

def run_eval_llm(dataset):
    rows = []
    if not _GEMINI_READY:
        print("Set GOOGLE_API_KEY to run evaluation.")
        return rows
    for item in dataset:
        pred = extract_structured_llm(item["text"], use_few_shot=True)
        gold = item["gold"]
        val_errs = validate_against_schema(pred)
        err_tags = classify_validation_errors(val_errs)
        if pred.get("order_id") and pred["order_id"] not in item["text"] and pred["order_id"] not in ORDER_DB:
            err_tags = (err_tags or []) + ["hallucinated_id"]
        c, t = field_accuracy(pred, gold)
        rows.append({
            "id": item["id"],
            "field_accuracy": c/t if t else 1.0,
            "errors": val_errs,
            "error_tags": err_tags,
            "pred": pred,
            "gold": gold
        })
    return rows

if _GEMINI_READY:
    eval_rows = run_eval_llm(EMAILS)
    if eval_rows:
        pretty(eval_rows[:2])
        avg = sum(r["field_accuracy"] for r in eval_rows)/len(eval_rows)
        print("\nAverage field accuracy:", round(avg, 3))

## Tool Layer with Guardrails

In [None]:
AUDIT_LOG = []

def log(action, **kwargs):
    AUDIT_LOG.append({ "ts": datetime.utcnow().isoformat()+"Z", "action": action, **kwargs })

from datetime import datetime

def get_order(order_id):
    log("get_order", order_id=order_id)
    return ORDER_DB.get(order_id)

def get_policy(topic):
    log("get_policy", topic=topic)
    return POLICY_PAGES.get(topic)

def create_case(payload):
    errs = validate_against_schema(payload)
    if errs:
        log("create_case_denied", reason="schema_validation_failed", errors=errs)
        raise ValueError("Schema validation failed; refusing to create case.")
    case_id = f"CASE-{payload.get('order_id','NA')}-{len(AUDIT_LOG)}"
    log("create_case", case_id=case_id, payload=payload)
    return {"case_id": case_id, "status": "created"}

def send_email(to, template_id, vars):
    log("send_email_draft", to=to, template_id=template_id, vars=vars)
    return {"draft_id": f"DRAFT-{len(AUDIT_LOG)}", "status": "draft_created"}

## Orchestrator — Using LLM Fields

In [None]:
def apply_sla_priority(fields):
    if fields.get("intent") == "delivery_issue":
        t = SLA_TABLE["delivery_issue"]
        if fields.get("priority") != "high" and "refund_shipping" in (fields.get("requested_actions") or []):
            fields["priority"] = "high"
    return fields

def plan_actions(email_text):
    fields = extract_structured_llm(email_text, use_few_shot=True)
    order = get_order(fields.get("order_id")) if fields.get("order_id") else None
    policy_refund = get_policy("refund_shipping") if "refund_shipping" in (fields.get("requested_actions") or []) else None

    if fields.get("intent") == "unknown" or (fields.get("order_id") is None and fields.get("next_step") != "ask_for_more_info"):
        fields["next_step"] = "ask_for_more_info"

    fields = apply_sla_priority(fields)

    action_results = {}
    if fields["next_step"] == "create_case":
        action_results["case"] = create_case(fields)
        if order:
            action_results["email_draft"] = send_email(
                to=order["email"],
                template_id="delivery_status_with_refund" if "refund_shipping" in fields["requested_actions"] else "delivery_status",
                vars={"order_id": order["order_id"], "eta": order.get("eta"), "policy": policy_refund and policy_refund["policy"]}
            )
    elif fields["next_step"] == "ask_for_more_info":
        action_results["email_draft"] = send_email(
            to="customer@example.com",
            template_id="request_more_info",
            vars={"need":"order_id and details"}
        )
    return fields, action_results

if _GEMINI_READY:
    fields_demo, results_demo = plan_actions(EMAILS[0]["text"])
    pretty({"fields": fields_demo, "results": results_demo})
else:
    print("Set GOOGLE_API_KEY to test the orchestrator.")

## Rationale Separation — Keep it Internal

In [None]:
def public_summary(fields):
    public = {k:v for k,v in fields.items() if k != "rationale_internal"}
    summary = f"We identified a {public['intent']} on order {public.get('order_id') or '(missing ID)'}; next step: {public['next_step']}."
    return public, summary

if _GEMINI_READY:
    pub, summary = public_summary(fields_demo)
    pretty({"public_output": pub, "summary_for_user": summary})