# Risk / Control Heatmap — Agents Notebook
Generated: **2025-11-09T12:29:17Z**


```mermaid
flowchart LR 
  subgraph GRID["Heatmap (Risk severity by area)"]
    direction TB
    H0[Areas →]
    H1[PVA / APIM]
    H2[Orchestrator\n(SK / LangGraph)]
    H3[Tools / RAG]
    H4[Data Plane\n(AOAI / Search / Storage)]
    H5[Ops / CI-CD]
    L0[Latency]
    L1(( )):::med
    L2(( )):::high
    L3(( )):::high
    L4(( )):::med
    L5(( )):::med
    G0[Grounding Accuracy]
    G1(( )):::low
    G2(( )):::med
    G3(( )):::crit
    G4(( )):::med
    G5(( )):::low
    C0[Cost]
    C1(( )):::low
    C2(( )):::med
    C3(( )):::high
    C4(( )):::high
    C5(( )):::med
    R0[Data Residency]
    R1(( )):::low
    R2(( )):::med
    R3(( )):::med
    R4(( )):::crit
    R5(( )):::med
  end
  H0 --- H1 --- H2 --- H3 --- H4 --- H5
  L0 --- L1 --- L2 --- L3 --- L4 --- L5
  G0 --- G1 --- G2 --- G3 --- G4 --- G5
  C0 --- C1 --- C2 --- C3 --- C4 --- C5
  R0 --- R1 --- R2 --- R3 --- R4 --- R5
  subgraph CTRL["Key Controls (by risk)"]
    direction TB
    CLAT[Latency Controls:\\n• APIM caching / retries\\n• Concurrent graph tuning\\n• p95 SLO with canary gates]
    CGRA[Grounding Controls:\\n• Hybrid search + filters\\n• Citation coverage thresholds\\n• Index drift detection / reindex]
    CCST[Cost Controls:\\n• Token meters & caps\\n• Cheaper model fallback\\n• Per-intent $/turn budgets]
    CDRS[Residency Controls:\\n• Private Endpoints (PE)\\n• Regional pinning & DLP\\n• RBAC + data tagging]
  end
  L2 -.-> CLAT
  L3 -.-> CLAT
  G3 -.-> CGRA
  C3 -.-> CCST
  C4 -.-> CCST
  R4 -.-> CDRS
  subgraph LEGEND["Legend"]
    direction LR
    LG1[Low]:::low
    LG2[Med]:::med
    LG3[High]:::high
    LG4[Crit]:::crit
  end
  classDef low  fill:#e8f5e9,stroke:#2e7d32,stroke-width:2px,color:#1b5e20;
  classDef med  fill:#fff8e1,stroke:#f9a825,stroke-width:2px,color:#7a4f01;
  classDef high fill:#ffebee,stroke:#c62828,stroke-width:2px,color:#7f1d1d;
  classDef crit fill:#f3e8ff,stroke:#6a1b9a,stroke-width:2px,color:#4a148c;
  class GRID,CTRL,LEGEND low
```


## SETUP


In [None]:
# %% [SETUP]
!pip -q install -U semantic-kernel
print("Installed: semantic-kernel")


## SETUP-ENV


In [None]:
# %% [SETUP-ENV]
import os, getpass
os.environ.setdefault('AZURE_OPENAI_ENDPOINT', 'https://4th-openai-resource.openai.azure.com')
os.environ.setdefault('AZURE_OPENAI_DEPLOYMENT', 'gpt-35-turbo')
os.environ.setdefault('AZURE_OPENAI_API_VERSION', '2024-10-21')
if not os.getenv('AZURE_OPENAI_API_KEY'):
    os.environ['AZURE_OPENAI_API_KEY'] = getpass.getpass('Enter AZURE_OPENAI_API_KEY (hidden): ').strip()
print('Azure OpenAI env ready (key is session-only).')


## KERNEL


In [None]:
# %% [KERNEL]
import os
from semantic_kernel import Kernel
from semantic_kernel.connectors.ai.open_ai import AzureChatCompletion
kernel = Kernel()
try:
    service = AzureChatCompletion(
        service_id="azure",
        api_key=os.getenv("AZURE_OPENAI_API_KEY"),
        deployment_name=os.getenv("AZURE_OPENAI_DEPLOYMENT"),
        endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
    )
    kernel.add_service(service)
    print("Kernel ready (Azure OpenAI).")
except Exception as e:
    print("Kernel setup warning:", e)


## TOOLS


In [None]:
# %% [TOOLS]
def tool_apim_caching(**kwargs): return "stub:APIM caching/retry " + str(kwargs)
def tool_concurrent_graph(**kwargs): return "stub:Concurrent graph tuning " + str(kwargs)
def tool_grounded_retriever(**kwargs): return "stub:Hybrid search + filters " + str(kwargs)
def tool_citation_threshold(**kwargs): return "stub:Citation coverage check " + str(kwargs)
def tool_token_meter(**kwargs): return "stub:Token meter/caps " + str(kwargs)
def tool_cheaper_model(**kwargs): return "stub:Cheaper model fallback " + str(kwargs)
def tool_private_endpoints(**kwargs): return "stub:Private Endpoints usage " + str(kwargs)
def tool_regional_pinning(**kwargs): return "stub:Regional pinning / DLP " + str(kwargs)

TOOLS = {
    "apim_caching": tool_apim_caching,
    "concurrent_graph": tool_concurrent_graph,
    "grounded_retriever": tool_grounded_retriever,
    "citation_threshold": tool_citation_threshold,
    "token_meter": tool_token_meter,
    "cheaper_model": tool_cheaper_model,
    "private_endpoints": tool_private_endpoints,
    "regional_pinning": tool_regional_pinning,
}
print("Tools:", list(TOOLS.keys()))


## AGENTS


In [None]:
# %% [AGENTS]
class Agent:
    def __init__(self, kernel, name, system_message, skills):
        self.kernel = kernel
        self.name = name
        self.system_message = system_message
        self.skills = skills or []

    async def run(self, user_text: str) -> str:
        return f"[demo:{self.name}] {user_text}"

    def available_tools(self): return [t for t in self.skills if t in TOOLS]
    def call(self, tool_name: str, **kwargs):
        fn = TOOLS.get(tool_name)
        if not fn: raise ValueError(f"Tool not found: {tool_name}")
        return fn(**kwargs)

agent_pva_apim = Agent(kernel, "PVA / APIM", "Ingress latency and correlation via PVA/APIM.", ["apim_caching"])
agent_orchestrator = Agent(kernel, "Orchestrator (SK/LangGraph)", "Planning, concurrency, retries.", ["concurrent_graph","token_meter","cheaper_model"])
agent_tools_rag = Agent(kernel, "Tools / RAG", "Grounding quality and citations.", ["grounded_retriever","citation_threshold"])
agent_data_plane = Agent(kernel, "Data Plane (AOAI/Search/Storage)", "Residency via PE and pinning.", ["private_endpoints","regional_pinning"])
agent_ops = Agent(kernel, "Ops / CI-CD", "SLOs and cost posture.", ["token_meter","cheaper_model"])
print("Agents:", [a.name for a in [agent_pva_apim, agent_orchestrator, agent_tools_rag, agent_data_plane, agent_ops]])


## WIRES


In [None]:
# %% [WIRES]
ROUTES = {
    "Latency": "pva_apim",
    "Grounding Accuracy": "tools_rag",
    "Cost": "ops",
    "Data Residency": "data_plane",
}
AGENT_INDEX = {
    "pva_apim": agent_pva_apim,
    "orchestrator": agent_orchestrator,
    "tools_rag": agent_tools_rag,
    "data_plane": agent_data_plane,
    "ops": agent_ops,
}
def validate_wiring():
    problems = []
    for risk, key in ROUTES.items():
        agent = AGENT_INDEX.get(key)
        if not agent: problems.append(f"{risk} -> missing agent key '{key}'"); continue
        if not agent.available_tools(): problems.append(f"{risk} -> {agent.name} has no available tools")
    return problems

total_wires = len(ROUTES)
distinct_agents = len(set(ROUTES.values()))
unreferenced_agents = sorted(set(AGENT_INDEX.keys()) - set(ROUTES.values()))
targets_by_agent = {}
for risk, key in ROUTES.items():
    targets_by_agent.setdefault(key, []).append(risk)

issues = validate_wiring()
print(f"Wires: {total_wires} (distinct agents: {distinct_agents})")
for agent_key, risks in targets_by_agent.items():
    agent_name = AGENT_INDEX[agent_key].name
    print(f"  - {agent_name} ← {len(risks)} risk(s): {', '.join(risks)}")
if unreferenced_agents: print(f"Unreferenced agents: {', '.join(unreferenced_agents)}")
print("Wiring OK" if not issues else "Wiring issues:\n- " + "\n- ".join(issues))


## DEMO


In [None]:
# %% [DEMO]
import asyncio, time
async def demo_run():
    t0 = time.time()
    samples = [
        ("Latency", "p95 latency is spiking in APIM."),
        ("Grounding Accuracy", "Ensure citations cover all key claims."),
        ("Cost", "Cap cost to $0.01 per turn for FAQ."),
        ("Data Residency", "Restrict data plane to region US only via PE."),
    ]
    outputs = []
    for risk, text in samples:
        key = ROUTES[risk]
        agent = AGENT_INDEX[key]
        tool_out = None
        if key == "pva_apim":
            tool_out = agent.call("apim_caching", policy="retry-3", cache_ttl="30s")
        elif key == "tools_rag":
            tool_out = agent.call("grounded_retriever", k=5, filter="tenant:acme")
        elif key == "ops":
            tool_out = agent.call("token_meter", cap_tokens=2000)
        elif key == "data_plane":
            tool_out = agent.call("private_endpoints", services=["aoai","search","storage"])
        note = await agent.run(text)
        outputs.append((risk, agent.name, tool_out, note))
    elapsed_ms = int((time.time() - t0)*1000)
    return elapsed_ms, outputs

try:
    loop = asyncio.get_running_loop()
    try:
        elapsed_ms, outputs = await demo_run()
    except SyntaxError:
        import nest_asyncio; nest_asyncio.apply()
        elapsed_ms, outputs = loop.run_until_complete(demo_run())
except RuntimeError:
    elapsed_ms, outputs = asyncio.run(demo_run())

print("Elapsed (ms):", elapsed_ms)
for risk, agent_name, tool, note in outputs:
    print(f"Risk: {risk} → Agent: {agent_name}\n  Tool: {tool}\n  Note: {note}\n")
