# Assembled Notebook — Indexing Lifecycle Agents
_Generated 2025-11-07T21:37:08.618064Z_

> Agents and tools synthesized from your indexing lifecycle diagram (not embedded).

In [None]:
# %% [SETUP]
!pip install -U semantic-kernel
!pip -q uninstall -y pydrive2

In [None]:
# %% [SETUP-ENV]
import os, getpass
os.environ.setdefault('AZURE_OPENAI_ENDPOINT', 'https://4th-openai-resource.openai.azure.com')
os.environ.setdefault('AZURE_OPENAI_DEPLOYMENT', 'gpt-35-turbo')
os.environ.setdefault('AZURE_OPENAI_API_VERSION', '2024-10-21')
if not os.getenv('AZURE_OPENAI_API_KEY'):
    os.environ['AZURE_OPENAI_API_KEY'] = getpass.getpass('Enter AZURE_OPENAI_API_KEY (hidden): ').strip()
print('Azure OpenAI env ready (key is session-only).')

In [None]:
# %% [KERNEL]
import os
from semantic_kernel import Kernel
from semantic_kernel.connectors.ai.open_ai import AzureChatCompletion

kernel = Kernel()

service = AzureChatCompletion(
    service_id='azure',
    api_key=os.getenv('AZURE_OPENAI_API_KEY'),
    deployment_name=os.getenv('AZURE_OPENAI_DEPLOYMENT'),
    endpoint=os.getenv('AZURE_OPENAI_ENDPOINT'),
)
kernel.add_service(service)
print('Kernel ready (Azure OpenAI)')

In [None]:
# %% [TOOLS]

def tool_queue_push(**kwargs):
    """Push a work item to the queue with priority & scope."""
    return "stub:queue_push " + str(kwargs)

def tool_queue_pop(**kwargs):
    """Pop/peek a work item for a worker."""
    return "stub:queue_pop " + str(kwargs)

def tool_crawl(**kwargs):
    """Fetch content from a source URI; return bytes/etag/last-mod."""
    return "stub:crawl " + str(kwargs)

def tool_parse(**kwargs):
    """Parse bytes → text + metadata; extract mime/sections/pages."""
    return "stub:parse " + str(kwargs)

def tool_diff(**kwargs):
    """Compare etag/last-mod/hash; return changed: bool and reason."""
    return "stub:diff " + str(kwargs)

def tool_chunk(**kwargs):
    """Split text into ordered chunks with size/overlap; count tokens."""
    return "stub:chunk " + str(kwargs)

def tool_embed(**kwargs):
    """Generate embeddings for chunks (model configurable)."""
    return "stub:embed " + str(kwargs)

def tool_upsert_index(**kwargs):
    """Upsert vector+BM25 docs into AI Search."""
    return "stub:upsert_index " + str(kwargs)

def tool_catalog(**kwargs):
    """Maintain document→chunks mapping and states."""
    return "stub:catalog " + str(kwargs)

def tool_tombstone(**kwargs):
    """Mark stale chunks as tombstoned and propagate to index."""
    return "stub:tombstone " + str(kwargs)

def tool_stats(**kwargs):
    """Compute distribution stats and cosine shift vs. baseline."""
    return "stub:stats " + str(kwargs)

def tool_drift_check(**kwargs):
    """Compare stats vs thresholds; return drift flag + targets."""
    return "stub:drift_check " + str(kwargs)

def tool_trace_log(**kwargs):
    """Append telemetry event (trace_id, stage, latency, cost)."""
    return "stub:trace_log " + str(kwargs)

def tool_alert(**kwargs):
    """Raise alert (failures, drift, quota)."""
    return "stub:alert " + str(kwargs)

def tool_audit(**kwargs):
    """Append audit entry (who/when/what)."""
    return "stub:audit " + str(kwargs)

def tool_blob_store(**kwargs):
    """Put/get raw and text artifacts in blob storage."""
    return "stub:blob_store " + str(kwargs)


TOOLS = {

    'tool_queue_push': tool_queue_push,

    'tool_queue_pop': tool_queue_pop,

    'tool_crawl': tool_crawl,

    'tool_parse': tool_parse,

    'tool_diff': tool_diff,

    'tool_chunk': tool_chunk,

    'tool_embed': tool_embed,

    'tool_upsert_index': tool_upsert_index,

    'tool_catalog': tool_catalog,

    'tool_tombstone': tool_tombstone,

    'tool_stats': tool_stats,

    'tool_drift_check': tool_drift_check,

    'tool_trace_log': tool_trace_log,

    'tool_alert': tool_alert,

    'tool_audit': tool_audit,

    'tool_blob_store': tool_blob_store,

}
print('Tools:', list(TOOLS.keys()))

In [None]:
# %% [AGENTS]

class Agent_planner:
    def __init__(self, kernel):
        self.kernel = kernel
        self.name = "Ingest Planner"
        self.system_message = "You are the Ingest Planner capability agent for indexing lifecycle."
        self.skills = ["tool_queue_push", "tool_trace_log"]
    async def run(self, user_text: str) -> str:
        try:
            result = await self.kernel.invoke_prompt(self.system_message + "\n\nUser: " + user_text)
            return str(result)
        except Exception as e:
            return f"[Ingest Planner stub] Adjust SK call. Error: {e}"
    def available_tools(self):
        return [t for t in self.skills if t in TOOLS]
    def call(self, tool_name: str, **kwargs):
        fn = TOOLS.get(tool_name)
        if not fn:
            raise ValueError(f"Tool not found: {tool_name}")
        return fn(**kwargs)

class Agent_queue_mgr:
    def __init__(self, kernel):
        self.kernel = kernel
        self.name = "Work Queue Manager"
        self.system_message = "You are the Work Queue Manager capability agent for indexing lifecycle."
        self.skills = ["tool_queue_pop", "tool_queue_push", "tool_trace_log"]
    async def run(self, user_text: str) -> str:
        try:
            result = await self.kernel.invoke_prompt(self.system_message + "\n\nUser: " + user_text)
            return str(result)
        except Exception as e:
            return f"[Work Queue Manager stub] Adjust SK call. Error: {e}"
    def available_tools(self):
        return [t for t in self.skills if t in TOOLS]
    def call(self, tool_name: str, **kwargs):
        fn = TOOLS.get(tool_name)
        if not fn:
            raise ValueError(f"Tool not found: {tool_name}")
        return fn(**kwargs)

class Agent_crawler:
    def __init__(self, kernel):
        self.kernel = kernel
        self.name = "Crawler"
        self.system_message = "You are the Crawler capability agent for indexing lifecycle."
        self.skills = ["tool_crawl", "tool_blob_store", "tool_trace_log"]
    async def run(self, user_text: str) -> str:
        try:
            result = await self.kernel.invoke_prompt(self.system_message + "\n\nUser: " + user_text)
            return str(result)
        except Exception as e:
            return f"[Crawler stub] Adjust SK call. Error: {e}"
    def available_tools(self):
        return [t for t in self.skills if t in TOOLS]
    def call(self, tool_name: str, **kwargs):
        fn = TOOLS.get(tool_name)
        if not fn:
            raise ValueError(f"Tool not found: {tool_name}")
        return fn(**kwargs)

class Agent_parser:
    def __init__(self, kernel):
        self.kernel = kernel
        self.name = "Parser/Extractor"
        self.system_message = "You are the Parser/Extractor capability agent for indexing lifecycle."
        self.skills = ["tool_parse", "tool_blob_store", "tool_trace_log"]
    async def run(self, user_text: str) -> str:
        try:
            result = await self.kernel.invoke_prompt(self.system_message + "\n\nUser: " + user_text)
            return str(result)
        except Exception as e:
            return f"[Parser/Extractor stub] Adjust SK call. Error: {e}"
    def available_tools(self):
        return [t for t in self.skills if t in TOOLS]
    def call(self, tool_name: str, **kwargs):
        fn = TOOLS.get(tool_name)
        if not fn:
            raise ValueError(f"Tool not found: {tool_name}")
        return fn(**kwargs)

class Agent_differ:
    def __init__(self, kernel):
        self.kernel = kernel
        self.name = "Delta Detector"
        self.system_message = "You are the Delta Detector capability agent for indexing lifecycle."
        self.skills = ["tool_diff", "tool_trace_log"]
    async def run(self, user_text: str) -> str:
        try:
            result = await self.kernel.invoke_prompt(self.system_message + "\n\nUser: " + user_text)
            return str(result)
        except Exception as e:
            return f"[Delta Detector stub] Adjust SK call. Error: {e}"
    def available_tools(self):
        return [t for t in self.skills if t in TOOLS]
    def call(self, tool_name: str, **kwargs):
        fn = TOOLS.get(tool_name)
        if not fn:
            raise ValueError(f"Tool not found: {tool_name}")
        return fn(**kwargs)

class Agent_chunker:
    def __init__(self, kernel):
        self.kernel = kernel
        self.name = "Chunker"
        self.system_message = "You are the Chunker capability agent for indexing lifecycle."
        self.skills = ["tool_chunk", "tool_catalog", "tool_trace_log"]
    async def run(self, user_text: str) -> str:
        try:
            result = await self.kernel.invoke_prompt(self.system_message + "\n\nUser: " + user_text)
            return str(result)
        except Exception as e:
            return f"[Chunker stub] Adjust SK call. Error: {e}"
    def available_tools(self):
        return [t for t in self.skills if t in TOOLS]
    def call(self, tool_name: str, **kwargs):
        fn = TOOLS.get(tool_name)
        if not fn:
            raise ValueError(f"Tool not found: {tool_name}")
        return fn(**kwargs)

class Agent_embedder:
    def __init__(self, kernel):
        self.kernel = kernel
        self.name = "Embedder"
        self.system_message = "You are the Embedder capability agent for indexing lifecycle."
        self.skills = ["tool_embed", "tool_trace_log"]
    async def run(self, user_text: str) -> str:
        try:
            result = await self.kernel.invoke_prompt(self.system_message + "\n\nUser: " + user_text)
            return str(result)
        except Exception as e:
            return f"[Embedder stub] Adjust SK call. Error: {e}"
    def available_tools(self):
        return [t for t in self.skills if t in TOOLS]
    def call(self, tool_name: str, **kwargs):
        fn = TOOLS.get(tool_name)
        if not fn:
            raise ValueError(f"Tool not found: {tool_name}")
        return fn(**kwargs)

class Agent_indexer:
    def __init__(self, kernel):
        self.kernel = kernel
        self.name = "Indexer"
        self.system_message = "You are the Indexer capability agent for indexing lifecycle."
        self.skills = ["tool_upsert_index", "tool_trace_log"]
    async def run(self, user_text: str) -> str:
        try:
            result = await self.kernel.invoke_prompt(self.system_message + "\n\nUser: " + user_text)
            return str(result)
        except Exception as e:
            return f"[Indexer stub] Adjust SK call. Error: {e}"
    def available_tools(self):
        return [t for t in self.skills if t in TOOLS]
    def call(self, tool_name: str, **kwargs):
        fn = TOOLS.get(tool_name)
        if not fn:
            raise ValueError(f"Tool not found: {tool_name}")
        return fn(**kwargs)

class Agent_cataloger:
    def __init__(self, kernel):
        self.kernel = kernel
        self.name = "Catalog Updater"
        self.system_message = "You are the Catalog Updater capability agent for indexing lifecycle."
        self.skills = ["tool_catalog", "tool_trace_log"]
    async def run(self, user_text: str) -> str:
        try:
            result = await self.kernel.invoke_prompt(self.system_message + "\n\nUser: " + user_text)
            return str(result)
        except Exception as e:
            return f"[Catalog Updater stub] Adjust SK call. Error: {e}"
    def available_tools(self):
        return [t for t in self.skills if t in TOOLS]
    def call(self, tool_name: str, **kwargs):
        fn = TOOLS.get(tool_name)
        if not fn:
            raise ValueError(f"Tool not found: {tool_name}")
        return fn(**kwargs)

class Agent_tombstoner:
    def __init__(self, kernel):
        self.kernel = kernel
        self.name = "Tombstoner"
        self.system_message = "You are the Tombstoner capability agent for indexing lifecycle."
        self.skills = ["tool_tombstone", "tool_catalog", "tool_trace_log"]
    async def run(self, user_text: str) -> str:
        try:
            result = await self.kernel.invoke_prompt(self.system_message + "\n\nUser: " + user_text)
            return str(result)
        except Exception as e:
            return f"[Tombstoner stub] Adjust SK call. Error: {e}"
    def available_tools(self):
        return [t for t in self.skills if t in TOOLS]
    def call(self, tool_name: str, **kwargs):
        fn = TOOLS.get(tool_name)
        if not fn:
            raise ValueError(f"Tool not found: {tool_name}")
        return fn(**kwargs)

class Agent_driftmon:
    def __init__(self, kernel):
        self.kernel = kernel
        self.name = "Drift Monitor"
        self.system_message = "You are the Drift Monitor capability agent for indexing lifecycle."
        self.skills = ["tool_stats", "tool_drift_check", "tool_queue_push", "tool_trace_log"]
    async def run(self, user_text: str) -> str:
        try:
            result = await self.kernel.invoke_prompt(self.system_message + "\n\nUser: " + user_text)
            return str(result)
        except Exception as e:
            return f"[Drift Monitor stub] Adjust SK call. Error: {e}"
    def available_tools(self):
        return [t for t in self.skills if t in TOOLS]
    def call(self, tool_name: str, **kwargs):
        fn = TOOLS.get(tool_name)
        if not fn:
            raise ValueError(f"Tool not found: {tool_name}")
        return fn(**kwargs)

class Agent_observer:
    def __init__(self, kernel):
        self.kernel = kernel
        self.name = "Observability Agent"
        self.system_message = "You are the Observability Agent capability agent for indexing lifecycle."
        self.skills = ["tool_trace_log", "tool_alert", "tool_audit"]
    async def run(self, user_text: str) -> str:
        try:
            result = await self.kernel.invoke_prompt(self.system_message + "\n\nUser: " + user_text)
            return str(result)
        except Exception as e:
            return f"[Observability Agent stub] Adjust SK call. Error: {e}"
    def available_tools(self):
        return [t for t in self.skills if t in TOOLS]
    def call(self, tool_name: str, **kwargs):
        fn = TOOLS.get(tool_name)
        if not fn:
            raise ValueError(f"Tool not found: {tool_name}")
        return fn(**kwargs)


# Instances

agent_planner = Agent_planner(kernel)

agent_queue_mgr = Agent_queue_mgr(kernel)

agent_crawler = Agent_crawler(kernel)

agent_parser = Agent_parser(kernel)

agent_differ = Agent_differ(kernel)

agent_chunker = Agent_chunker(kernel)

agent_embedder = Agent_embedder(kernel)

agent_indexer = Agent_indexer(kernel)

agent_cataloger = Agent_cataloger(kernel)

agent_tombstoner = Agent_tombstoner(kernel)

agent_driftmon = Agent_driftmon(kernel)

agent_observer = Agent_observer(kernel)

print('Agents:', ['agent_planner', 'agent_queue_mgr', 'agent_crawler', 'agent_parser', 'agent_differ', 'agent_chunker', 'agent_embedder', 'agent_indexer', 'agent_cataloger', 'agent_tombstoner', 'agent_driftmon', 'agent_observer'])

In [None]:
# %% [WIRES]
WIRES = {
  "Ingest Planner": {
    "tools": [
      "tool_queue_push",
      "tool_trace_log"
    ]
  },
  "Work Queue Manager": {
    "tools": [
      "tool_queue_pop",
      "tool_queue_push",
      "tool_trace_log"
    ]
  },
  "Crawler": {
    "tools": [
      "tool_crawl",
      "tool_blob_store",
      "tool_trace_log"
    ]
  },
  "Parser/Extractor": {
    "tools": [
      "tool_parse",
      "tool_blob_store",
      "tool_trace_log"
    ]
  },
  "Delta Detector": {
    "tools": [
      "tool_diff",
      "tool_trace_log"
    ]
  },
  "Chunker": {
    "tools": [
      "tool_chunk",
      "tool_catalog",
      "tool_trace_log"
    ]
  },
  "Embedder": {
    "tools": [
      "tool_embed",
      "tool_trace_log"
    ]
  },
  "Indexer": {
    "tools": [
      "tool_upsert_index",
      "tool_trace_log"
    ]
  },
  "Catalog Updater": {
    "tools": [
      "tool_catalog",
      "tool_trace_log"
    ]
  },
  "Tombstoner": {
    "tools": [
      "tool_tombstone",
      "tool_catalog",
      "tool_trace_log"
    ]
  },
  "Drift Monitor": {
    "tools": [
      "tool_stats",
      "tool_drift_check",
      "tool_queue_push",
      "tool_trace_log"
    ]
  },
  "Observability Agent": {
    "tools": [
      "tool_trace_log",
      "tool_alert",
      "tool_audit"
    ]
  }
}
print('Wiring entries:', len(WIRES))

In [None]:

# %% [DEMO]
import os, getpass, types, asyncio
from semantic_kernel import Kernel
from semantic_kernel.connectors.ai.open_ai import AzureChatCompletion

os.environ.setdefault("AZURE_OPENAI_ENDPOINT",    "https://4th-openai-resource.openai.azure.com")
os.environ.setdefault("AZURE_OPENAI_DEPLOYMENT",  "gpt-35-turbo")
os.environ.setdefault("AZURE_OPENAI_API_VERSION", "2024-10-21")
if not os.getenv("AZURE_OPENAI_API_KEY"):
    os.environ["AZURE_OPENAI_API_KEY"] = getpass.getpass("Enter AZURE_OPENAI_API_KEY (hidden): ").strip()

try:
    kernel
except NameError:
    kernel = Kernel()
try:
    kernel.remove_service("azure")
except Exception:
    pass
kernel.add_service(AzureChatCompletion(
    service_id="azure",
    api_key=os.getenv("AZURE_OPENAI_API_KEY"),
    deployment_name=os.getenv("AZURE_OPENAI_DEPLOYMENT"),
    endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
    api_version=os.getenv("AZURE_OPENAI_API_VERSION"),
))

async def _run_with_azure(self, user_text: str):
    prompt = (getattr(self, "system_message", "") or "") + "\\n\\nUser: " + str(user_text)
    result = await self.kernel.invoke_prompt(prompt, service_id="azure")
    return str(result)

patched = []
for name, obj in list(globals().items()):
    if name.startswith("agent_"):
        try:
            obj.kernel = kernel
            obj.run = types.MethodType(_run_with_azure, obj)
            patched.append(name)
        except Exception:
            pass
print("Patched run() for:", patched if patched else "(none)")

async def demo():
    planner = globals().get("agent_planner")
    chunker = globals().get("agent_chunker")
    embedder = globals().get("agent_embedder")
    indexer = globals().get("agent_indexer")

    for label, agent in [("planner", planner), ("chunker", chunker), ("embedder", embedder), ("indexer", indexer)]:
        print(f\"Agent ({label}):\", getattr(agent, "name", "(missing)"))
        if hasattr(agent, "available_tools"):
            print(f\"Tools ({label}):\", agent.available_tools())
            if agent.available_tools():
                t0 = agent.available_tools()[0]
                print(f\"Call {t0} ->\", agent.call(t0, example="value"))
    print("LLM demo:")
    try:
        out = await planner.run("In one sentence, outline the indexing lifecycle from crawl to upsert.")
        print(out)
    except Exception as e:
        print("[demo] invoke failed:", e)

await demo()
