# Before vs After LLM Output (Measurable Uplift)

This notebook measures intent-classification quality before and after `open-vernacular-ai-kit` preprocessing.

- Input: mixed Gujarati-English support utterances
- Output: intent JSON (`intent`)
- Metric: intent accuracy uplift (`after - before`)

In [None]:
from __future__ import annotations

import json
import os
from pathlib import Path
from typing import Any

from open_vernacular_ai_kit import render_codemix

DATA_PATH = Path("../docs/data/llm_uplift_examples.jsonl").resolve()
rows: list[dict[str, Any]] = []
for line in DATA_PATH.read_text(encoding="utf-8").splitlines():
    line = line.strip()
    if not line:
        continue
    rows.append(json.loads(line))

print(f"loaded rows: {len(rows)} from {DATA_PATH}")
rows[:2]

In [None]:
for row in rows:
    row["codemix"] = render_codemix(
        row["raw"],
        language="gu",
        translit_mode="sentence",
    )

rows[:3]

In [None]:
# Optional: set True to use OpenAI for real model outputs.
USE_OPENAI = False
OPENAI_MODEL = "gpt-4o-mini"

def _safe_json_parse(s: str) -> dict[str, Any]:
    try:
        obj = json.loads(s)
        if isinstance(obj, dict):
            return obj
    except Exception:
        pass
    return {"intent": "unknown"}

def mock_llm_intent(text: str) -> dict[str, Any]:
    # Deliberately script-sensitive baseline to show preprocessing impact.
    t = (text or "").lower()
    if "શું" in text and ("order" in t or "status" in t):
        return {"intent": "order_status"}
    if "નથી" in text and "payment" in t:
        return {"intent": "payment_issue"}
    if "હું" in text and "return" in t:
        return {"intent": "return_request"}
    if "આજે" in text and "ક્યારે" in text and "delivery" in t:
        return {"intent": "delivery_eta"}
    if "કેમ" in text and ("તમે" in text or "હું" in text):
        return {"intent": "greeting"}
    return {"intent": "unknown"}

def openai_llm_intent(text: str) -> dict[str, Any]:
    from openai import OpenAI

    client = OpenAI()
    resp = client.responses.create(
        model=OPENAI_MODEL,
        input=[
            {
                "role": "system",
                "content": (
                    "Classify support intent and return strict JSON only with key intent. "
                    "Allowed intents: order_status, payment_issue, return_request, delivery_eta, greeting, unknown."
                ),
            },
            {
                "role": "user",
                "content": text,
            },
        ],
    )
    return _safe_json_parse(resp.output_text)

def llm_intent(text: str) -> dict[str, Any]:
    if USE_OPENAI:
        if not os.getenv("OPENAI_API_KEY"):
            raise RuntimeError("Set OPENAI_API_KEY before USE_OPENAI=True")
        return openai_llm_intent(text)
    return mock_llm_intent(text)


In [None]:
def accuracy(expected: list[str], predicted: list[str]) -> float:
    if not expected:
        return 0.0
    ok = sum(1 for i in range(len(expected)) if expected[i] == predicted[i])
    return ok / len(expected)

expected = [r["expected_intent"] for r in rows]
before_pred = [llm_intent(r["raw"]).get("intent", "unknown") for r in rows]
after_pred = [llm_intent(r["codemix"]).get("intent", "unknown") for r in rows]

before_acc = accuracy(expected, before_pred)
after_acc = accuracy(expected, after_pred)
uplift = after_acc - before_acc

report = {
    "n_rows": len(rows),
    "mode": "openai" if USE_OPENAI else "mock",
    "before_accuracy": round(before_acc, 4),
    "after_accuracy": round(after_acc, 4),
    "uplift": round(uplift, 4),
}
print(json.dumps(report, indent=2, ensure_ascii=False))

In [None]:
for i, row in enumerate(rows):
    print(
        f"{i+1:02d} | raw={row['raw']} | codemix={row['codemix']} | "
        f"exp={expected[i]} | before={before_pred[i]} | after={after_pred[i]}"
    )

## How to run with a real LLM

1. `pip install openai jupyter`
2. `export OPENAI_API_KEY=...`
3. Set `USE_OPENAI = True` in the notebook and re-run all cells.

You will get a measurable before/after uplift report on the same labeled examples.