# No-Model NL to SQL Dataflow Demo

This notebook demonstrates the full project dataflow without loading any model weights.

What it shows:
1. Prompt and message construction.
2. Constraint extraction from NLQ.
3. SQL extraction from noisy text.
4. Stage-by-stage SQL postprocessing debug.
5. Execution checks (VA / EM / EX) on a tiny local SQLite database.
6. Regeneration-style retry loop using fixed candidate strings.


In [None]:
from __future__ import annotations

import json
from pathlib import Path
from pprint import pprint

import pandas as pd
import sqlalchemy
from sqlalchemy import text

from nl2sql.core.prompting import SYSTEM_INSTRUCTIONS, make_few_shot_messages
from nl2sql.agent.constraint_policy import build_constraints
from nl2sql.core.llm import debug_extract_first_select
from nl2sql.core.postprocess import debug_guarded_postprocess, normalize_sql
from nl2sql.core.query_runner import QueryRunner
from nl2sql.evaluation.eval import execution_accuracy

print("Imports ready. No model loaded.")


## 1) Pick a realistic question and show dataset context

We use one NLQ from your benchmark for context, then run a local demo NLQ that can execute on a tiny in-memory SQLite DB.


In [None]:
def _load_test_set() -> list[dict]:
    candidates = [
        Path("data/classicmodels_test_200.json"),
        Path("../data/classicmodels_test_200.json"),
    ]
    for p in candidates:
        if p.exists():
            return json.loads(p.read_text(encoding="utf-8"))
    raise FileNotFoundError("Could not locate classicmodels_test_200.json")


benchmark = _load_test_set()
benchmark_item = benchmark[0]

demo_nlq = "List all customer names in France"
demo_gold_sql = "SELECT customerName FROM customers WHERE country = 'France';"

print("Benchmark sample NLQ:", benchmark_item["nlq"])
print("Benchmark sample SQL:", benchmark_item["sql"])
print("\nLocal demo NLQ:", demo_nlq)
print("Local demo gold SQL:", demo_gold_sql)


## 2) Build schema summary and few-shot messages

This is the same message-building flow your real pipeline uses.


In [None]:
SCHEMA_SUMMARY_DEMO = (
    "Table customers (\n"
    "  customerNumber INT,\n"
    "  customerName TEXT,\n"
    "  contactLastName TEXT,\n"
    "  country TEXT,\n"
    "  creditLimit REAL\n"
    ")\n"
    "Table orders (\n"
    "  orderNumber INT,\n"
    "  customerNumber INT,\n"
    "  orderDate TEXT,\n"
    "  status TEXT\n"
    ")"
)

exemplars = [
    {
        "nlq": "List all customer names in Germany",
        "sql": "SELECT customerName FROM customers WHERE country = 'Germany';",
    },
    {
        "nlq": "Show customer names and credit limit for customers in France",
        "sql": "SELECT customerName, creditLimit FROM customers WHERE country = 'France';",
    },
]

messages = make_few_shot_messages(
    schema=SCHEMA_SUMMARY_DEMO,
    exemplars=exemplars,
    nlq=demo_nlq,
)

print("System prompt starts with:")
print(SYSTEM_INSTRUCTIONS.splitlines()[0])
print("\nMessage count:", len(messages))
for i, m in enumerate(messages, start=1):
    snippet = str(m["content"]).replace("\n", " ")[:140]
    print(f"[{i}] {m['role']}: {snippet}")


## 3) Build deterministic constraints from NLQ

This mirrors the project constraint policy before postprocessing.


In [None]:
constraints = build_constraints(demo_nlq, SCHEMA_SUMMARY_DEMO)

interesting_keys = [
    "agg",
    "needs_group_by",
    "needs_order_by",
    "limit",
    "distinct",
    "explicit_fields",
    "required_output_fields",
    "explicit_projection",
    "required_tables",
    "rule_tags",
]

print("Constraint summary:")
pprint({k: constraints.get(k) for k in interesting_keys})


## 4) Create a tiny local database (no cloud DB needed)

This lets us demonstrate VA/EM/EX with real SQL execution.


In [None]:
engine = sqlalchemy.create_engine("sqlite+pysqlite:///:memory:", future=True)

with engine.begin() as conn:
    conn.execute(text("""
        CREATE TABLE customers (
            customerNumber INTEGER PRIMARY KEY,
            customerName TEXT,
            contactLastName TEXT,
            country TEXT,
            creditLimit REAL
        )
    """))

    conn.execute(text("""
        CREATE TABLE orders (
            orderNumber INTEGER PRIMARY KEY,
            customerNumber INTEGER,
            orderDate TEXT,
            status TEXT
        )
    """))

    conn.execute(text("""
        INSERT INTO customers (customerNumber, customerName, contactLastName, country, creditLimit) VALUES
        (103, 'Atelier graphique', 'Schmitt', 'France', 21000.00),
        (112, 'Signal Gift Stores', 'King', 'USA', 71800.00),
        (119, 'La Rochelle Gifts', 'Labrune', 'France', 118200.00),
        (121, 'Baane Mini Imports', 'Petersen', 'Denmark', 81700.00)
    """))

with engine.connect() as conn:
    preview = pd.read_sql(
        text("SELECT customerNumber, customerName, country FROM customers ORDER BY customerNumber"),
        conn,
    )

print("Local DB preview:")
display(preview)


## 5) Simulate model raw outputs and run extraction/postprocessing debug

We intentionally include one broken candidate and one noisy but recoverable candidate.


In [None]:
raw_generations = [
    """
I think the answer is:
SELECT customerName, customerNumber
FROM customer
WHERE country = 'France'
ORDER BY customerName DESC
LIMIT 5;
""",
    """
Here is SQL:
```sql
SQL: SELECT customerName, customerNumber
FROM customers
WHERE country = 'France'
ORDER BY customerName DESC
LIMIT 5
```
The query above should work.
""",
]

for i, raw in enumerate(raw_generations, start=1):
    print("\n" + "=" * 90)
    print(f"RAW CANDIDATE {i}")
    print(raw.strip())

    extract_debug = debug_extract_first_select(raw)
    extracted_sql = extract_debug.get("selected_sql") or raw

    print("\nExtraction candidates:")
    for cand in extract_debug.get("candidates", []):
        print(
            "- accepted=", cand.get("accepted"),
            "reject_reason=", cand.get("reject_reason"),
            "from_target=", cand.get("from_target"),
        )
        print("  sql:", cand.get("candidate_sql"))

    post_debug = debug_guarded_postprocess(
        extracted_sql,
        demo_nlq,
        explicit_fields=constraints.get("explicit_fields") if constraints.get("explicit_projection") else None,
        required_fields=constraints.get("required_output_fields"),
    )

    print("\nPostprocess changed stages:")
    for step in post_debug["steps"]:
        print(f"- {step['stage']}: changed={step['changed']}")

    print("Final SQL:")
    print(post_debug["final_sql"])


## 6) Evaluate candidates like a mini regen loop

- VA: query executes
- EM: normalized SQL text equals gold SQL
- EX: execution result equals gold result


In [None]:
qr = QueryRunner(engine, max_rows=50)
attempt_rows: list[dict] = []

for i, raw in enumerate(raw_generations, start=1):
    extract_debug = debug_extract_first_select(raw)
    extracted_sql = extract_debug.get("selected_sql") or raw

    post_debug = debug_guarded_postprocess(
        extracted_sql,
        demo_nlq,
        explicit_fields=constraints.get("explicit_fields") if constraints.get("explicit_projection") else None,
        required_fields=constraints.get("required_output_fields"),
    )
    pred_sql = post_debug["final_sql"]

    va_meta = qr.run(pred_sql, capture_df=False)
    em = normalize_sql(pred_sql) == normalize_sql(demo_gold_sql)
    ex, ex_pred_err, ex_gold_err = execution_accuracy(
        engine=engine,
        pred_sql=pred_sql,
        gold_sql=demo_gold_sql,
    )

    attempt_rows.append(
        {
            "attempt": i,
            "pred_sql": pred_sql,
            "va": int(bool(va_meta.success)),
            "em": int(bool(em)),
            "ex": int(bool(ex)),
            "error": va_meta.error or ex_pred_err,
            "gold_error": ex_gold_err,
        }
    )

    if bool(va_meta.success) and bool(ex):
        break

report_df = pd.DataFrame(attempt_rows)
display(report_df)

if not report_df.empty:
    final_row = report_df.iloc[-1].to_dict()
    print("Selected final attempt:", final_row["attempt"])
    print("Selected SQL:", final_row["pred_sql"])


## 7) Explicit field ordering demo

This shows `enforce_explicit_projection` and `reorder_projection` behavior for NLQs that list fields.


In [None]:
explicit_nlq = "List contact last name, customer name, and customer number for customers in France"
explicit_constraints = build_constraints(explicit_nlq, SCHEMA_SUMMARY_DEMO)

explicit_raw = """
SELECT customerName, creditLimit, customerNumber, contactLastName
FROM customers
WHERE country = 'France'
ORDER BY customerName
"""

explicit_post = debug_guarded_postprocess(
    explicit_raw,
    explicit_nlq,
    explicit_fields=explicit_constraints.get("explicit_fields") if explicit_constraints.get("explicit_projection") else None,
    required_fields=explicit_constraints.get("required_output_fields"),
)

print("Explicit fields detected:", explicit_constraints.get("explicit_fields"))
print("\nChanged stages:")
for step in explicit_post["steps"]:
    if step["changed"]:
        print("-", step["stage"])
print("\nFinal SQL:")
print(explicit_post["final_sql"])


## Done

You can now present the whole architecture without loading model weights:
- Prompting API
- Constraint policy
- SQL extraction
- Postprocessing trace
- Execution gate and scoring loop
