# %% [SETUP]
Mermaid renderer + section headers. The two diagrams below (BDD and IBD) will render via the lightweight JS snippet.


In [None]:
# Minimal Mermaid renderer for notebooks
from IPython.display import HTML, display
_mermaid_boot = '''
<script>
if (!window._mermaidLoaded) {
  const s = document.createElement('script');
  s.src = "https://cdn.jsdelivr.net/npm/mermaid/dist/mermaid.min.js";
  s.onload = () => { window.mermaid && mermaid.initialize({ startOnLoad: true, securityLevel: 'loose' }); };
  document.head.appendChild(s);
  window._mermaidLoaded = true;
}
</script>
<style>
.mermaid { background: transparent; }
</style>
'''
display(HTML(_mermaid_boot))
print("Mermaid ready.")


# Block Definition Diagram (BDD)

```mermaid
classDiagram
%% ===================== BLOCK DEFINITION DIAGRAM (BDD) =====================
class Agent {
  +name: string
  +trace_id: string
  +run(prompt): Answer
  -planner: Planner
  -policy: PolicyEngine
}

class Planner {
  +plan(prompt, context): Plan
  +select_tools(intent): ToolList
  -grounding: GroundingService
  -tools: ToolAdapter[*]
}

class ToolAdapter {
  +name: string
  +invoke(args): ToolResult
  +cost_estimate(args): Tokens
}

class GroundingService {
  +retrieve(query): ContextPacket
  +cite(selection): Citation[]
  -memory: MemoryStore
}

class MemoryStore {
  +get_thread_state(trace_id): ThreadState
  +put_thread_state(trace_id, state)
  +get_embeddings(ids): Vectors
}

class PolicyEngine {
  +precheck(prompt, plan): Decision
  +postcheck(answer, citations): Decision
  +allowlist(intent): ToolList
  +denylist(): Pattern[]
}

class Answer {
  +text: string
  +citations: Citation[]
  +cost: Tokens
  +confidence: float
}

class Plan {
  +intent: string
  +steps: Step[*]
  +tool_budget: Tokens
}

%% Relationships
Agent --> Planner : composes
Agent ..> PolicyEngine : uses
Planner ..> GroundingService : uses
Planner --> "0..*" ToolAdapter : selects
GroundingService --> MemoryStore : reads/writes
Agent --> Answer : returns
Planner --> Plan : returns
```

# Internal Block Diagram (IBD)

```mermaid
flowchart LR
%% ===================== INTERNAL BLOCK DIAGRAM (IBD) =====================
%% Agent, Planner, Tool Adapter, Grounding, Memory, Policy

subgraph TURN["Conversation Turn (Internal Wiring)"]
  direction LR

  A[Agent.Core\n(run)]:::agent
  P[Planner\n(plan/select tools)]:::planner
  PRE[Policy.Precheck\n(allow/deny, redact)]:::policy
  POST[Policy.Postcheck\n(leakage, safety, confidence)]:::policy

  G[Grounding Service\n(hybrid search → citations)]:::ground
  M[Memory Store\n(thread state, embeddings)]:::mem

  subgraph TAD["Tool Adapters"]
    direction TB
    T1[HTTP Tool]
    T2[SOAP Tool]
    T3[Calc Tool]
    T4[RAG Retriever Adapter]
  end
end

%% -------- DATA / CONTROL FLOWS --------
A -- "prompt + trace_id" --> PRE
PRE -- "decision + constraints\n(intent allowlist, budgets)" --> P

%% planning & grounding
P -- "query" --> G
G -- "context packet + citations" --> P
G --- M
M -. "get/put state, vectors" .- G

%% tool calls
P -- "invoke(args) within budget" --> TAD
TAD -- "tool result\n(latency, tokens)" --> P

%% answer assembly & safety
P -- "draft answer + citations" --> POST
POST -- "final answer / refuse / redact" --> A

## ===================== STYLES =====================
classDef agent  fill:#e8f0fe,stroke:#1a73e8,stroke-width:2px,color:#0b468c;
classDef planner fill:#ecfeff,stroke:#06b6d4,stroke-width:2px,color:#134e4a;
classDef policy fill:#fff7ed,stroke:#fb923c,stroke-width:2px,color:#7c2d12;
classDef ground fill:#eef2ff,stroke:#4f46e5,stroke-width:2px,color:#111827;
classDef mem fill:#fef9c3,stroke:#f59e0b,stroke-width:2px,color:#7c2d12;

class A agent
class P planner
class PRE,POST policy
class G ground
class M mem
```


In [None]:
# %% [SETUP-ENV]
import os, getpass
os.environ.setdefault('AZURE_OPENAI_ENDPOINT', 'https://4th-openai-resource.openai.azure.com')
os.environ.setdefault('AZURE_OPENAI_DEPLOYMENT', 'gpt-35-turbo')
os.environ.setdefault('AZURE_OPENAI_API_VERSION', '2024-10-21')
if not os.getenv('AZURE_OPENAI_API_KEY'):
    try:
        os.environ['AZURE_OPENAI_API_KEY'] = getpass.getpass('Enter AZURE_OPENAI_API_KEY (hidden): ').strip()
    except Exception:
        os.environ['AZURE_OPENAI_API_KEY'] = 'placeholder_key'
print('Azure OpenAI env ready (key is session-only).')


In [None]:
# %% [KERNEL]
import uuid, json, time, random, asyncio
from dataclasses import dataclass

@dataclass
class KernelResult:
    text: str
    meta: dict

class MiniKernel:
    def __init__(self):
        self.model = os.getenv("AZURE_OPENAI_DEPLOYMENT", "gpt-35-turbo")
    async def invoke_prompt(self, prompt: str, citations=None):
        await asyncio.sleep(0.02 + random.random()*0.03)
        return KernelResult(
            text=(f"[{self.model}] {prompt[:180]}" + ("..." if len(prompt) > 180 else "")),
            meta={"tokens_in": len(prompt)//4, "tokens_out": 40, "citations": citations or []},
        )

kernel = MiniKernel()
print("Kernel ready:", kernel.model)


In [None]:
# %% [TOOLS]
from typing import Any, Dict
class ToolBase:
    name = "base"
    def invoke(self, **kwargs) -> Dict[str, Any]:
        return {"tool": self.name, "args": kwargs, "result": "stub", "latency_ms": 5, "tokens": 10}
class HttpTool(ToolBase):
    name = "http"
    def invoke(self, method="GET", url="https://example.org", **kw):
        return {"tool": self.name, "method": method, "url": url, "status": 200, "bytes": 512, "latency_ms": 12, "tokens": 5}
class SoapTool(ToolBase):
    name = "soap"
    def invoke(self, action="Ping", **kw):
        return {"tool": self.name, "action": action, "status": "OK", "latency_ms": 20, "tokens": 8}
class CalcTool(ToolBase):
    name = "calc"
    def invoke(self, expr="1+1", **kw):
        try:
            val = float(eval(expr, {"__builtins__": {}}, {}))
        except Exception as e:
            return {"tool": self.name, "error": str(e), "latency_ms": 1, "tokens": 2}
        return {"tool": self.name, "expr": expr, "value": val, "latency_ms": 1, "tokens": 1}
class RagTool(ToolBase):
    name = "rag"
    def invoke(self, query=""):
        cites = [{"doc":"kb://demo", "quote":"sample", "score":0.87}]
        ctx = "This is stubbed grounding context for: " + query[:60]
        return {"tool": self.name, "query": query, "context": ctx, "citations": cites, "latency_ms": 18, "tokens": 20}

class MemoryStore:
    def __init__(self): self._threads = {}
    def get_thread_state(self, trace_id): return self._threads.get(trace_id, {"turns": 0})
    def put_thread_state(self, trace_id, state): self._threads[trace_id] = state
    def get_embeddings(self, ids): return [[0.1]*3 for _ in ids]

class PolicyEngine:
    def allowlist(self, intent): 
        if intent == "rag": return ["rag"]
        if intent == "calc": return ["calc"]
        if intent == "http": return ["http"]
        if intent == "soap": return ["soap"]
        return ["rag","calc","http","soap"]
    def denylist(self): return []
    def precheck(self, prompt, plan): return {"ok": True, "constraints": {"allowed": self.allowlist(plan.get("intent",""))}}
    def postcheck(self, answer, citations): return {"ok": True, "reason": None, "confidence": 0.74}

class GroundingService:
    def __init__(self, memory: MemoryStore): self.memory = memory
    def retrieve(self, query): 
        r = RagTool().invoke(query=query)
        return {"packet": r["context"], "citations": r["citations"]}
    def cite(self, selection): return [{"doc":"kb://demo", "quote": selection[:40]}]

class Planner:
    def __init__(self, grounding: GroundingService, tools: Dict[str, ToolBase]):
        self.grounding, self.tools = grounding, tools
    def _intent(self, prompt: str) -> str:
        p = prompt.lower()
        if any(k in p for k in ["sum", "calc", "compute"]): return "calc"
        if "http" in p or "url" in p: return "http"
        if "soap" in p or "wsdl" in p: return "soap"
        if "cite" in p or "ground" in p or "search" in p: return "rag"
        return "rag"
    def plan(self, prompt, context=None):
        intent = self._intent(prompt)
        steps = ["analyze", "maybe_tool", "draft", "review"]
        return {"intent": intent, "steps": steps, "tool_budget": 200}
    def select_tools(self, intent):
        allow = {"calc": ["calc"], "http": ["http"], "soap": ["soap"], "rag": ["rag"]}.get(intent, ["rag"])
        return [self.tools[n] for n in allow if n in self.tools]

tools = {"http": HttpTool(), "soap": SoapTool(), "calc": CalcTool(), "rag": RagTool()}
memory = MemoryStore()
grounding = GroundingService(memory)
policy = PolicyEngine()
planner = Planner(grounding, tools)
print("Tools loaded:", list(tools.keys()))


In [None]:
# %% [AGENTS]
class Agent:
    def __init__(self, name, kernel, planner, policy):
        self.name, self.kernel, self.planner, self.policy = name, kernel, planner, policy
    def available_tools(self):
        return list(tools.keys())
    def call(self, tool_name: str, **kwargs):
        if tool_name not in tools: raise ValueError(f"Tool not found: {tool_name}")
        return tools[tool_name].invoke(**kwargs)
    async def run(self, prompt: str, trace_id=None):
        import uuid as _uuid
        tid = trace_id or str(_uuid.uuid4())
        state = memory.get_thread_state(tid)
        plan = self.planner.plan(prompt, context=state)
        pre = self.policy.precheck(prompt, plan)
        allowed = pre["constraints"]["allowed"]
        use = self.planner.select_tools(plan["intent"])
        use = [t for t in use if t.name in allowed]
        citations, draft = [], ""
        if any(t.name == "rag" for t in use):
            ctx = grounding.retrieve(prompt)
            citations = ctx["citations"]
            draft = f"Grounded: {ctx['packet']} | Prompt: {prompt}"
        elif use:
            r = use[0].invoke(query=prompt) if use[0].name!="calc" else use[0].invoke(expr="2+2")
            draft = f"Tool[{use[0].name}] → {r}"
        else:
            draft = f"LLM-only: {prompt}"
        kr = await self.kernel.invoke_prompt(draft, citations=citations)
        post = self.policy.postcheck(kr.text, citations)
        memory.put_thread_state(tid, {"turns": state.get("turns",0)+1})
        return {"answer": kr.text, "citations": citations, "confidence": post.get("confidence", 0.5)}

agent_core = Agent("Agent.Core", kernel, planner, policy)
agent_calc = Agent("Agent.Calc", kernel, planner, policy)
agent_http = Agent("Agent.Http", kernel, planner, policy)
agent_soap = Agent("Agent.SOAP", kernel, planner, policy)
agent_rag  = Agent("Agent.RAG",  kernel, planner, policy)
print("Agents ready:", [a.name for a in [agent_core, agent_calc, agent_http, agent_soap, agent_rag]])


In [None]:
# %% [WIRES]
ROUTES = {
    "general": "agent_core",
    "calc": "agent_calc",
    "http": "agent_http",
    "soap": "agent_soap",
    "rag": "agent_rag",
}
AGENT_INDEX = {
    "agent_core": agent_core,
    "agent_calc": agent_calc,
    "agent_http": agent_http,
    "agent_soap": agent_soap,
    "agent_rag": agent_rag,
}

def validate_wiring():
    problems = []
    for intent, k in ROUTES.items():
        a = AGENT_INDEX.get(k)
        if not a:
            problems.append(f"{intent} -> missing agent key '{k}'")
            continue
        if not a.available_tools():
            problems.append(f"{intent} -> {a.name} has no available tools")
    return problems

total_wires = len(ROUTES)
distinct_agents = len(set(ROUTES.values()))
unreferenced = sorted(set(AGENT_INDEX.keys()) - set(ROUTES.values()))
targets_by_agent = {}
for intent, key in ROUTES.items():
    targets_by_agent.setdefault(key, []).append(intent)

issues = validate_wiring()
print(f"Wires: {total_wires} (distinct agents: {distinct_agents})")
for agent_key, intents in targets_by_agent.items():
    agent_name = AGENT_INDEX[agent_key].name
    print(f"  - {agent_name} ← {len(intents)} route(s): {', '.join(intents)}")
if unreferenced: print(f"Unreferenced agents: {', '.join(unreferenced)}")
print("Wiring OK" if not issues else "Wiring issues:\\n- " + "\\n- ".join(issues))


In [None]:
# %% [DEMO]
# Notebook-safe async demo (no asyncio.run() when a loop exists).
import asyncio, time

samples = [
    ("rag",   "Ground this answer with citations about hybrid search."),
    ("calc",  "Please compute the sum 2+2."),
    ("http",  "Do an HTTP call to example.org"),
    ("soap",  "Call a SOAP action for demo"),
    ("general","Just chat and cite if needed."),
]

async def demo_run():
    t0 = time.time()
    outputs = []
    for intent, text in samples:
        key = ROUTES[intent]
        agent = AGENT_INDEX[key]
        tool_result = None
        if intent == "calc":
            tool_result = agent.call("calc", expr="6*7")
        elif intent == "http":
            tool_result = agent.call("http", method="GET", url="https://example.org")
        elif intent == "soap":
            tool_result = agent.call("soap", action="Ping")
        elif intent == "rag":
            tool_result = agent.call("rag", query=text)

        llm_out = await agent.run(text)
        outputs.append({
            "intent": intent,
            "agent": agent.name,
            "tool_result": tool_result,
            "llm_result": llm_out["answer"][:220] + ("..." if len(llm_out["answer"])>220 else ""),
            "confidence": llm_out["confidence"],
        })
    elapsed_ms = int((time.time() - t0)*1000)
    return {"elapsed_ms": elapsed_ms, "runs": outputs}

try:
    loop = asyncio.get_running_loop()
    try:
        result = await demo_run()
    except SyntaxError:
        import nest_asyncio; nest_asyncio.apply()
        result = loop.run_until_complete(demo_run())
except RuntimeError:
    result = asyncio.run(demo_run())

print("Elapsed (ms):", result["elapsed_ms"])
for r in result["runs"]:
    print(f\"\"\"\nIntent: {r['intent']} -> Agent: {r['agent']}
Tool: {r['tool_result']}
LLM:  {r['llm_result']} (conf={r['confidence']:.2f})\"\"\".rstrip())
