In [1]:
#!pip install google-adk google-genai pandas
#!pip install nest_asyncio
# Disable the internal deprecated print in Runner


import builtins
_real_print = builtins.print
def patched_print(*args, **kwargs):
    if args and isinstance(args[0], str) and "Deprecated. Please migrate" in args[0]:
        return
    return _real_print(*args, **kwargs)
builtins.print = patched_print
import os
import json
from pathlib import Path
from typing import Dict, Any

import pandas as pd

from google.genai.types import Content, Part
from google.genai import types

# ADK core components
from google.adk.agents import LlmAgent
from google.adk.models.google_llm import Gemini
import google.adk.runners as adk_runners
from google.adk.runners import Runner


from google.adk.sessions import InMemorySessionService
from google.adk.memory import InMemoryMemoryService

# Tools
from google.adk.tools import FunctionTool
from google.adk.tools.tool_context import ToolContext


In [2]:
# 1) API key from config.py
try:
    from config import GOOGLE_API_KEY
    os.environ["GOOGLE_API_KEY"] = GOOGLE_API_KEY
except ImportError:
    raise RuntimeError(
        "Missing config.py. Create a file named config.py with:\n"
        "GOOGLE_API_KEY = \"your_key_here\""
    )

# 2) Paths
BASE_DIR = Path(".")
KB_DIR = BASE_DIR / "knowledge_base"
DATA_DIR = BASE_DIR / "data"

KB_IT_FILE = KB_DIR / "kb_it.txt"
KB_HR_FILE = KB_DIR / "kb_hr.txt"
KB_SOFTWARE_FILE = KB_DIR / "kb_software.txt"

TICKETS_FILE = DATA_DIR / "tickets.json"

# 3) ADK identifiers
APP_NAME = "ai_enterprise_support_desk"
USER_ID = "demo_user"

# 4) Model + retry options
#GEMINI_MODEL = "gemini-2.0-flash"
GEMINI_MODEL = "gemini-2.5-flash"
retry_config = types.HttpRetryOptions(
    attempts=5,
    exp_base=7,
    initial_delay=1,
    http_status_codes=[429, 500, 503, 504],
)


In [3]:
# Mapping: category -> KB file path
CATEGORY_TO_FILE = {
    "it": KB_IT_FILE,
    "hr": KB_HR_FILE,
    "software": KB_SOFTWARE_FILE,
}


def search_knowledge_base(category: str, query: str) -> dict:
    """
    Simple keyword search in the knowledge base.
    """
    category = category.lower().strip()

    if category not in CATEGORY_TO_FILE:
        return {"results": [f"Unknown category: {category}"]}

    path = CATEGORY_TO_FILE[category]
    if not path.exists():
        return {"results": [f"KB file not found for category: {category}"]}

    lines = path.read_text(encoding="utf-8", errors="ignore").splitlines()
    q = query.lower().strip()

    matches = [ln for ln in lines if q in ln.lower()]
    if not matches:
        matches = ["No matches found."]

    return {"results": matches[:10]}  # cap results


kb_tool = FunctionTool(search_knowledge_base)


In [4]:
# Session + memory
session_service = InMemorySessionService()
memory_service = InMemoryMemoryService()

# 1) Classifier agent
classifier_agent = LlmAgent(
    name="classifier_agent",
    model=Gemini(model=GEMINI_MODEL, retry_options=retry_config),
    instruction=(
        "You MUST output VALID RAW JSON ONLY.\n"
        "NO markdown, NO code fences, NO ``` AT ALL.\n"
        "No text before or after JSON. JSON ONLY.\n\n"
        "Classify the support ticket into exactly one category:\n"
        "- IT\n"
        "- HR\n"
        "- Software\n\n"
        "Return JSON EXACTLY in this format:\n"
        "{\n"
        "  \"category\": \"IT\" | \"HR\" | \"Software\",\n"
        "  \"reasoning\": \"short explanation\",\n"
        "  \"trace_id\": \"unique short identifier\"\n"
        "}"
    ),
)

# 2) Retrieval agent
retrieval_agent = LlmAgent(
    name="retrieval_agent",
    model=Gemini(model=GEMINI_MODEL, retry_options=retry_config),
    instruction=(
        "You ONLY return JSON. No text before or after JSON.\n"
        "You MUST call the tool search_knowledge_base EXACTLY once.\n"
        "Never hallucinate snippets yourself.\n"
        "The only allowed source of snippets is the tool.\n\n"
        "INPUT:\n"
        "- Ticket text\n"
        "- Category (IT, HR, Software)\n\n"
        "YOUR JOB:\n"
        "1. Extract a short keyword from the ticket.\n"
        "2. Call search_knowledge_base(category, query).\n"
        "3. Return ONLY this JSON:\n"
        "{\n"
        "  \"category\": \"IT\" | \"HR\" | \"Software\",\n"
        "  \"query\": \"keyword used\",\n"
        "  \"snippets\": [\"snippet1\", \"snippet2\", \"...\"]\n"
        "}"
    ),
    tools=[kb_tool],
)

# 3) Response agent
response_agent = LlmAgent(
    name="response_agent",
    model=Gemini(model=GEMINI_MODEL, retry_options=retry_config),
    instruction=(
        "ABSOLUTE RULES:\n"
        "- NEVER output ```.\n"
        "- NEVER output markdown.\n"
        "- NEVER output code fences of any kind.\n"
        "- Output MUST be RAW JSON ONLY.\n\n"
        "You ALWAYS reply with valid JSON ONLY.\n"
        "NO text before or after the JSON.\n\n"
        "Return JSON EXACTLY in this format:\n"
        "{\n"
        "  \"answer\": \"final helpful message to the employee\",\n"
        "  \"confidence\": 0.0,\n"
        "  \"category\": \"IT\" | \"HR\" | \"Software\"\n"
        "}"
    ),
)

# 4) Supervisor agent
supervisor_agent = LlmAgent(
    name="supervisor_agent",
    model=Gemini(model=GEMINI_MODEL, retry_options=retry_config),
    instruction=(
        "You are a strict JSON-only supervisor agent.\n\n"
        "You must decide:\n"
        "- APPROVE\n"
        "- RETRY_RETRIEVAL\n"
        "- ESCALATE\n\n"
        "Return JSON ONLY in this format:\n"
        "{\n"
        "  \"decision\": \"APPROVE\" | \"RETRY_RETRIEVAL\" | \"ESCALATE\",\n"
        "  \"reason\": \"short explanation\"\n"
        "}"
    ),
)


In [5]:
def parse_json_stripping_fences(raw: str):
    """
    Parse JSON that may be wrapped in ``` or ```json fences.
    """
    txt = raw.strip()
    if txt.startswith("```"):
        lines = txt.splitlines()

        # Remove first fence line like ``` or ```json
        if lines and lines[0].startswith("```"):
            lines = lines[1:]

        # Remove last fence line if it's ```
        if lines and lines[-1].startswith("```"):
            lines = lines[:-1]

        txt = "\n".join(lines).strip()

    return json.loads(txt)


import nest_asyncio
import asyncio
nest_asyncio.apply()

async def _run_agent_once_async(agent, user_message, session_id):
    content = Content(role="user", parts=[Part(text=user_message)])

    runner = Runner(
        app_name=APP_NAME,
        agent=agent,
        session_service=session_service,
        memory_service=memory_service,
    )

    collected_texts = []

    async for event in runner.run_async(
        user_id=USER_ID,
        session_id=session_id,
        new_message=content,
    ):
        # Capture LLM text
        if getattr(event, "author", None) == agent.name and hasattr(event, "content"):
            for part in event.content.parts:
                if hasattr(part, "text"):
                    collected_texts.append(part.text)

        # Capture tool results
        if getattr(event, "type", None) == "tool_result":
            if event.output:
                collected_texts.append(str(event.output))

    # Prefer latest JSON-looking text
    for text in reversed(collected_texts):
        if "{" in text and "}" in text:
            return text

    return collected_texts[-1] if collected_texts else ""


def run_agent_once(agent, user_message, session_id):
    # Correct async execution for Jupyter
    loop = asyncio.get_event_loop()
    return loop.run_until_complete(
        _run_agent_once_async(agent, user_message, session_id)
    )


def ensure_session(session_id: str):
    sess = session_service.get_session_sync(
        app_name=APP_NAME,
        user_id=USER_ID,
        session_id=session_id,
    )

    if sess is None:
        session_service.create_session_sync(
            app_name=APP_NAME,
            user_id=USER_ID,
            session_id=session_id,
        )


In [6]:
def process_ticket(ticket_text: str, ticket_id: str | None = None) -> Dict[str, Any]:
    """
    Run the full multi-agent pipeline on a single ticket.
    """
    session_id = ticket_id or "ticket_" + str(abs(hash(ticket_text)) % 10_000_000)

    # Ensure base session exists
    ensure_session(session_id)

   
    # 1) Classifier
    
    classifier_prompt = (
        "Classify the support ticket.\n"
        "Respond with JSON only.\n\n"
        f"TICKET:\n{ticket_text}\n"
    )

    classifier_raw = run_agent_once(classifier_agent, classifier_prompt, session_id)

    def parse_classifier(raw: str):
        try:
            return parse_json_stripping_fences(raw)
        except json.JSONDecodeError:
            return None

    classifier_json = parse_classifier(classifier_raw)

    # Retry ONCE with the SAME session_id (no '_retry')
    if classifier_json is None:
        classifier_raw_retry = run_agent_once(
            classifier_agent,
            classifier_prompt,
            session_id,   # <<< FIX: no "_retry"
        )
        classifier_json = parse_classifier(classifier_raw_retry)

    # If still invalid JSON â†’ assign UNKNOWN
    if classifier_json is None:
        classifier_json = {
            "category": "UNKNOWN",
            "reasoning": "Classifier returned invalid JSON twice.",
            "trace_id": f"classifier_parse_error_{session_id}",
        }

    category = classifier_json.get("category", "UNKNOWN")

    # 2) Retrieval
    
    retrieval_prompt = (
        "Retrieve grounded documentation.\n"
        "You MUST use the search_knowledge_base tool.\n"
        "Return JSON only.\n\n"
        f"CATEGORY: {category}\n"
        f"TICKET:\n{ticket_text}\n"
    )

    retrieval_raw = run_agent_once(retrieval_agent, retrieval_prompt, session_id)

    try:
        retrieval_json = parse_json_stripping_fences(retrieval_raw)
    except json.JSONDecodeError:
        retrieval_json = {
            "category": category,
            "query": "",
            "snippets": [
                f"Failed to parse retrieval JSON. Raw: {retrieval_raw[:180]}"
            ],
        }

    snippets = (
        retrieval_json.get("snippets")
        or retrieval_json.get("results")
        or []
    )

 
    # 3) Response
    
    response_prompt = (
        "Generate a final answer for the employee.\n"
        "Return JSON only.\n\n"
        f"TICKET:\n{ticket_text}\n\n"
        f"CATEGORY: {category}\n\n"
        "DOCUMENTATION SNIPPETS:\n"
        + "\n".join(f"- {s}" for s in snippets)
        + "\n"
    )

    response_raw = run_agent_once(response_agent, response_prompt, session_id)

    try:
        response_json = parse_json_stripping_fences(response_raw)
    except json.JSONDecodeError:
        response_json = {
            "answer": response_raw,
            "confidence": 0.4,
            "category": category,
        }

    response_answer = response_json.get("answer") or ""
    response_conf = response_json.get("confidence") or 0.0

    
    # 4) Supervisor

    supervisor_prompt = (
        "Evaluate the proposed answer with JSON only.\n\n"
        f"TICKET:\n{ticket_text}\n\n"
        f"ANSWER:\n{response_answer}\n\n"
        f"CONFIDENCE: {response_conf}\n\n"
        "SNIPPETS:\n"
        + "\n".join(f"- {s}" for s in snippets)
        + "\n"
    )

    supervisor_raw = run_agent_once(supervisor_agent, supervisor_prompt, session_id)

    try:
        supervisor_json = parse_json_stripping_fences(supervisor_raw)
    except json.JSONDecodeError:
        supervisor_json = {
            "decision": "APPROVE",
            "reason": f"Failed to parse supervisor JSON. Raw: {supervisor_raw[:180]}",
        }

    decision = (supervisor_json.get("decision", "APPROVE") or "APPROVE").upper()

    
    # 5) Optional second pass
   
    if decision == "RETRY_RETRIEVAL":
        # New retrieval
        retrieval_prompt_2 = (
            "RETRY: You must retrieve better snippets.\n"
            "Call search_knowledge_base again.\n"
            "Return JSON only.\n\n"
            f"CATEGORY: {category}\n"
            f"TICKET:\n{ticket_text}\n"
        )

        retrieval_raw_2 = run_agent_once(retrieval_agent, retrieval_prompt_2, session_id)

        try:
            retrieval_json_2 = parse_json_stripping_fences(retrieval_raw_2)
            snippets_2 = (
                retrieval_json_2.get("snippets")
                or retrieval_json_2.get("results")
                or []
            )
        except json.JSONDecodeError:
            snippets_2 = snippets

        # New response
        response_prompt_2 = (
            "SECOND ATTEMPT at final answer.\n"
            "Return JSON only.\n\n"
            f"TICKET:\n{ticket_text}\n\n"
            f"CATEGORY: {category}\n\n"
            "SNIPPETS:\n"
            + "\n".join(f"- {s}" for s in snippets_2)
            + "\n"
        )

        response_raw_2 = run_agent_once(response_agent, response_prompt_2, session_id)

        try:
            response_json_2 = parse_json_stripping_fences(response_raw_2)
            response_json = response_json_2
            snippets = snippets_2
        except Exception:
            pass

        # Final supervisor check
        supervisor_prompt_2 = (
            "FINAL evaluation.\n"
            "Return JSON only.\n\n"
            f"TICKET:\n{ticket_text}\n\n"
            f"ANSWER:\n{response_json.get('answer')}\n\n"
            f"CONFIDENCE: {response_json.get('confidence')}\n\n"
            "SNIPPETS:\n"
            + "\n".join(f"- {s}" for s in snippets)
            + "\n"
        )

        supervisor_raw_2 = run_agent_once(supervisor_agent, supervisor_prompt_2, session_id)

        try:
            supervisor_json = parse_json_stripping_fences(supervisor_raw_2)
            decision = supervisor_json.get("decision", decision)
        except Exception:
            pass

        decision = (decision or "ESCALATE").upper()

    
    # Final output
   
    return {
        "ticket_id": str(ticket_id) if ticket_id is not None else "",
        "text": ticket_text,
        "category_pred": category,
        "classifier_json": classifier_json,
        "retrieval_json": retrieval_json,
        "response_json": response_json,
        "supervisor_json": supervisor_json,
        "final_decision": decision,
    }



In [7]:
# Load tickets.json
if not TICKETS_FILE.exists():
    raise FileNotFoundError(f"tickets.json not found at: {TICKETS_FILE}")

tickets_df = pd.read_json(TICKETS_FILE)

if "text" not in tickets_df.columns:
    raise ValueError("tickets.json must contain a 'text' column with ticket text.")

# Test on a single ticket
TICKET_INDEX = 2  # change as needed

row = tickets_df.iloc[TICKET_INDEX]
ticket_text = row["text"]
ticket_id = row.get("id", f"ticket_{TICKET_INDEX}")

single_result = process_ticket(ticket_text, ticket_id=str(ticket_id))

if "true_category" in tickets_df.columns:
    single_result["true_category"] = row["true_category"]

pd.DataFrame([single_result])


Deprecated. Please migrate to the async method.
Deprecated. Please migrate to the async method.
Event from an unknown agent: classifier_agent, event id: caeb8387-23f5-4ec4-9502-6fc84dd2b54c
Event from an unknown agent: retrieval_agent, event id: 1289fd7a-443a-4ef1-9a62-4b0939061d11
Event from an unknown agent: retrieval_agent, event id: b087a08e-2f67-4bc6-bf22-1774ea365097
Event from an unknown agent: retrieval_agent, event id: d3b82953-fcdb-4a7b-a460-0e5448a3c972
Event from an unknown agent: classifier_agent, event id: caeb8387-23f5-4ec4-9502-6fc84dd2b54c
Event from an unknown agent: response_agent, event id: 38c72d13-ba24-4235-b44e-e7e4a5b4faf2
Event from an unknown agent: retrieval_agent, event id: 1289fd7a-443a-4ef1-9a62-4b0939061d11
Event from an unknown agent: retrieval_agent, event id: b087a08e-2f67-4bc6-bf22-1774ea365097
Event from an unknown agent: retrieval_agent, event id: d3b82953-fcdb-4a7b-a460-0e5448a3c972
Event from an unknown agent: classifier_agent, event id: caeb8387-

Unnamed: 0,ticket_id,text,category_pred,classifier_json,retrieval_json,response_json,supervisor_json,final_decision
0,3,I want to change my benefits.,HR,"{'category': 'HR', 'reasoning': 'Changing bene...","{'category': 'HR', 'query': 'change benefits',...",{'answer': 'I couldn't find specific documenta...,"{'decision': 'APPROVE', 'reason': 'The retriev...",APPROVE


In [8]:
all_results = []

for idx, row in tickets_df.iterrows():
    t_text = row["text"]
    t_id = row.get("id", str(idx))

    res = process_ticket(t_text, ticket_id=str(t_id))

    if "true_category" in tickets_df.columns:
        res["true_category"] = row["true_category"]

    all_results.append(res)

results_df = pd.DataFrame(all_results)
results_df.head()


Deprecated. Please migrate to the async method.
Deprecated. Please migrate to the async method.
Event from an unknown agent: classifier_agent, event id: 37d2917e-ad70-4d74-8e65-72d46814b3e5
Event from an unknown agent: retrieval_agent, event id: fefe35d4-34f3-43b1-b7ac-a34b13a5ac7b
Event from an unknown agent: retrieval_agent, event id: d7787788-4a29-4887-a30b-6656619adffa
Event from an unknown agent: retrieval_agent, event id: ff227ade-4bf2-4906-9ee1-48bdefc64631
Event from an unknown agent: classifier_agent, event id: 37d2917e-ad70-4d74-8e65-72d46814b3e5
Event from an unknown agent: response_agent, event id: 89361291-5bd9-4b6c-98b4-0ef6f94fa9d4
Event from an unknown agent: retrieval_agent, event id: fefe35d4-34f3-43b1-b7ac-a34b13a5ac7b
Event from an unknown agent: retrieval_agent, event id: d7787788-4a29-4887-a30b-6656619adffa
Event from an unknown agent: retrieval_agent, event id: ff227ade-4bf2-4906-9ee1-48bdefc64631
Event from an unknown agent: classifier_agent, event id: 37d2917e-

Unnamed: 0,ticket_id,text,category_pred,classifier_json,retrieval_json,response_json,supervisor_json,final_decision
0,1,My VPN stopped connecting after update. Error ...,IT,"{'category': 'IT', 'reasoning': 'The ticket de...","{'category': 'IT', 'query': 'VPN error 720', '...",{'answer': 'I'm sorry to hear your VPN stopped...,"{'decision': 'APPROVE', 'reason': 'The respons...",APPROVE
1,2,How do I request access to Tableau?,Software,"{'category': 'Software', 'reasoning': 'The use...","{'category': 'Software', 'query': 'Tableau acc...","{'answer': 'I apologize, but I couldn't find s...","{'decision': 'APPROVE', 'reason': 'The system ...",APPROVE
2,3,I want to change my benefits.,HR,"{'category': 'HR', 'reasoning': 'The request e...","{'category': 'HR', 'query': 'change benefits',...",{'answer': 'I couldn't find specific documenta...,"{'decision': 'APPROVE', 'reason': 'Despite no ...",APPROVE
3,4,My ID badge isn't scanning.,IT,"{'category': 'IT', 'reasoning': 'The issue inv...","{'category': 'IT', 'query': 'ID badge scanning...","{'answer': 'I apologize, but I couldn't find a...","{'decision': 'APPROVE', 'reason': 'The respons...",APPROVE


In [9]:
metrics = {}

# 1) Automation vs escalation
metrics["total_tickets"] = len(results_df)
metrics["approved"] = int((results_df["final_decision"] == "APPROVE").sum())
metrics["escalated"] = int((results_df["final_decision"] == "ESCALATE").sum())
metrics["retry_requested"] = int(
    results_df["supervisor_json"].apply(
        lambda d: d.get("decision") == "RETRY_RETRIEVAL"
    ).sum()
    if "supervisor_json" in results_df.columns
    else 0
)

metrics["automation_rate"] = metrics["approved"] / max(metrics["total_tickets"], 1)
metrics["escalation_rate"] = metrics["escalated"] / max(metrics["total_tickets"], 1)

# 2) Classification accuracy (if ground truth available)
if "true_category" in results_df.columns:
    metrics["classification_accuracy"] = float(
        (results_df["category_pred"] == results_df["true_category"]).mean()
    )

# 3) Average model confidence
def _conf(row):
    try:
        return float(row["response_json"].get("confidence", 0.0))
    except Exception:
        return 0.0

results_df["confidence"] = results_df.apply(_conf, axis=1)
metrics["avg_confidence"] = float(results_df["confidence"].mean())

metrics


{'total_tickets': 4,
 'approved': 4,
 'escalated': 0,
 'retry_requested': 0,
 'automation_rate': 1.0,
 'escalation_rate': 0.0,
 'avg_confidence': 0.75}

In [10]:
approved_df = results_df[results_df["final_decision"] == "APPROVE"].head(5)
display(approved_df[["ticket_id", "text", "category_pred", "confidence"]])


escalated_df = results_df[results_df["final_decision"] == "ESCALATE"].head(5)
display(escalated_df[["ticket_id", "text", "category_pred", "confidence"]])


Unnamed: 0,ticket_id,text,category_pred,confidence
0,1,My VPN stopped connecting after update. Error ...,IT,0.7
1,2,How do I request access to Tableau?,Software,1.0
2,3,I want to change my benefits.,HR,0.9
3,4,My ID badge isn't scanning.,IT,0.4


Unnamed: 0,ticket_id,text,category_pred,confidence
