diff --git a/.env.template b/.env.template index 3717937..dc6567c 100644 --- a/.env.template +++ b/.env.template @@ -9,6 +9,14 @@ COSMOS_DB_DATABASE=ai_memory COSMOS_DB_CONTAINER=memories COSMOS_DB_COUNTERS_CONTAINER=counter COSMOS_DB_LEASE_CONTAINER=leases +# Throughput mode for all required Cosmos DB containers created by the toolkit +# (memories, counter, and lease). +# - serverless: default. The toolkit does not send container RU/s settings. +# Use this only with a Cosmos DB account configured for serverless. +# - autoscale: the toolkit provisions all required containers with autoscale +# throughput using COSMOS_DB_AUTOSCALE_MAX_RU as the max RU/s cap. +# Default max RU/s is 1000. +COSMOS_DB_THROUGHPUT_MODE=serverless COSMOS_DB_AUTOSCALE_MAX_RU=1000 # ---- Change Feed Thresholds (set to 0 to disable) ---- diff --git a/Docs/README.md b/Docs/README.md index b4e4046..4332550 100644 --- a/Docs/README.md +++ b/Docs/README.md @@ -6,9 +6,9 @@ This folder contains the main project documentation for Agent Memory Toolkit. | Document | Purpose | |----------|---------| -| [concepts.md](concepts.md) | Explains the core memory model, including memory types (turn, summary, fact, user summary), threads, roles, the processing pipeline, and automatic change feed processing. | -| [local_testing.md](local_testing.md) | Covers local setup, environment configuration, RBAC, Cosmos provisioning, running the toolkit and Azure Functions locally, and testing change feed auto-processing. | -| [azure_testing.md](azure_testing.md) | Covers Azure deployment, cloud configuration, required services, change feed settings, and validation steps for running the toolkit in Azure. | +| [concepts.md](concepts.md) | Explains the core memory model, including memory types (turn, summary, fact, user summary), threads, roles, the processing pipeline, automatic change feed processing, and shared Cosmos throughput configuration. | +| [local_testing.md](local_testing.md) | Covers local setup, environment configuration, RBAC, Cosmos provisioning, running the toolkit and Azure Functions locally, and testing change feed auto-processing with serverless or autoscale container provisioning. | +| [azure_testing.md](azure_testing.md) | Covers Azure deployment, cloud configuration, required services, change feed settings, throughput mode configuration, and validation steps for running the toolkit in Azure. | | [design_patterns.md](design_patterns.md) | Shows when and how to call CRUD operations, summarization, fact extraction, and memory retrieval in chat and multi-agent applications, including automatic processing via the change feed. | ## Recommended Reading Order diff --git a/Docs/azure_testing.md b/Docs/azure_testing.md index ac8bad4..826b8b3 100644 --- a/Docs/azure_testing.md +++ b/Docs/azure_testing.md @@ -1,6 +1,6 @@ # Deploying and Testing Agent Memory Toolkit in Azure -This guide covers the minimum Azure resources, deployment steps, and validation order for running the toolkit in Azure. +This guide covers the minimum Azure resources, deployment steps, throughput settings, and validation order for running the toolkit in Azure. --- @@ -71,7 +71,7 @@ az cosmosdb create \ --resource-group ``` -The toolkit can create the database and container later via `create_memory_store()`. +The toolkit can create the database and required containers later via `create_memory_store()`. --- @@ -104,6 +104,9 @@ az functionapp config appsettings set \ COSMOS_DB_ENDPOINT="https://.documents.azure.com:443/" \ COSMOS_DB_DATABASE="ai_memory" \ COSMOS_DB_CONTAINER="memories" \ + COSMOS_DB_COUNTERS_CONTAINER="counter" \ + COSMOS_DB_LEASE_CONTAINER="leases" \ + COSMOS_DB_THROUGHPUT_MODE="serverless" \ COSMOS_DB_AUTOSCALE_MAX_RU="1000" \ AI_FOUNDRY_ENDPOINT="https://.openai.azure.com/" \ EMBEDDING_MODEL="text-embedding-3-large" \ @@ -111,6 +114,8 @@ az functionapp config appsettings set \ LLM_MODEL="gpt-5-mini" ``` +`COSMOS_DB_THROUGHPUT_MODE=serverless` is the default and creates the `memories`, `counter`, and `leases` containers without specifying RU/s. Set `COSMOS_DB_THROUGHPUT_MODE=autoscale` to apply the shared `COSMOS_DB_AUTOSCALE_MAX_RU` cap to all required containers. + ### Change feed settings (optional) To enable automatic processing via the change feed trigger, add these settings: @@ -122,6 +127,9 @@ az functionapp config appsettings set \ --settings \ COSMOS_DB__accountEndpoint="https://.documents.azure.com:443/" \ COSMOS_DB_COUNTERS_CONTAINER="counter" \ + COSMOS_DB_LEASE_CONTAINER="leases" \ + COSMOS_DB_THROUGHPUT_MODE="serverless" \ + COSMOS_DB_AUTOSCALE_MAX_RU="1000" \ THREAD_SUMMARY_EVERY_N="5" \ FACT_EXTRACTION_EVERY_N="3" \ USER_SUMMARY_EVERY_N="10" @@ -129,7 +137,7 @@ az functionapp config appsettings set \ Set any threshold to `"0"` to disable that processing type. -The `leases` container is created automatically by the Azure Functions runtime. +The `leases` container is provisioned by `create_memory_store()` alongside the `memories` and `counter` containers, so the Function App should be configured to use that existing lease container. If you use function-key auth for the HTTP trigger, keep the key for the client as `ADF_KEY`. @@ -161,6 +169,9 @@ Update `.env` to point at Azure instead of localhost: COSMOS_DB_ENDPOINT=https://.documents.azure.com:443/ COSMOS_DB_DATABASE=ai_memory COSMOS_DB_CONTAINER=memories +COSMOS_DB_COUNTERS_CONTAINER=counter +COSMOS_DB_LEASE_CONTAINER=leases +COSMOS_DB_THROUGHPUT_MODE=serverless COSMOS_DB_AUTOSCALE_MAX_RU=1000 AI_FOUNDRY_ENDPOINT=https://.openai.azure.com/ @@ -192,6 +203,10 @@ memory = AgentMemory( cosmos_endpoint=os.getenv("COSMOS_DB_ENDPOINT"), cosmos_database=os.getenv("COSMOS_DB_DATABASE"), cosmos_container=os.getenv("COSMOS_DB_CONTAINER"), + cosmos_counter_container=os.getenv("COSMOS_DB_COUNTERS_CONTAINER", "counter"), + cosmos_lease_container=os.getenv("COSMOS_DB_LEASE_CONTAINER", "leases"), + cosmos_throughput_mode=os.getenv("COSMOS_DB_THROUGHPUT_MODE", "serverless"), + cosmos_autoscale_max_ru=int(os.getenv("COSMOS_DB_AUTOSCALE_MAX_RU", "1000")), ai_foundry_endpoint=os.getenv("AI_FOUNDRY_ENDPOINT"), embedding_model=os.getenv("EMBEDDING_MODEL", "text-embedding-3-large"), adf_endpoint=os.getenv("ADF_ENDPOINT"), @@ -218,6 +233,10 @@ memory = AsyncAgentMemory( cosmos_endpoint=os.getenv("COSMOS_DB_ENDPOINT"), cosmos_database=os.getenv("COSMOS_DB_DATABASE"), cosmos_container=os.getenv("COSMOS_DB_CONTAINER"), + cosmos_counter_container=os.getenv("COSMOS_DB_COUNTERS_CONTAINER", "counter"), + cosmos_lease_container=os.getenv("COSMOS_DB_LEASE_CONTAINER", "leases"), + cosmos_throughput_mode=os.getenv("COSMOS_DB_THROUGHPUT_MODE", "serverless"), + cosmos_autoscale_max_ru=int(os.getenv("COSMOS_DB_AUTOSCALE_MAX_RU", "1000")), ai_foundry_endpoint=os.getenv("AI_FOUNDRY_ENDPOINT"), embedding_model=os.getenv("EMBEDDING_MODEL", "text-embedding-3-large"), adf_endpoint=os.getenv("ADF_ENDPOINT"), @@ -235,7 +254,7 @@ await memory.connect_cosmos( await memory.create_memory_store() ``` -This provisions the hierarchical partition key (`user_id`, `thread_id`), vector index, full-text index, and autoscale throughput. +This provisions the `memories`, `counter`, and `leases` containers. `serverless` is the default throughput mode; if you set `COSMOS_DB_THROUGHPUT_MODE=autoscale`, the shared `COSMOS_DB_AUTOSCALE_MAX_RU` value is applied to all three containers. --- diff --git a/Docs/concepts.md b/Docs/concepts.md index 5de9f10..abcff4f 100644 --- a/Docs/concepts.md +++ b/Docs/concepts.md @@ -154,7 +154,16 @@ Set any value to `0` to disable that processing type. For example, setting `THRE |-----------|---------------|---------| | `memories` | `/user_id`, `/thread_id` (hierarchical) | Existing memory store | | `counter` | `/user_id`, `/thread_id` (hierarchical) | Message count tracking for automatic processing | -| `leases` | `/id` | Auto-created by the trigger for change feed checkpointing | +| `leases` | `/id` | Change feed checkpointing container created by `create_memory_store()` | + +### Throughput configuration + +The toolkit provisions all required Cosmos containers under one shared throughput mode: + +- `serverless` is the default. The toolkit creates the `memories`, `counter`, and `leases` containers without specifying RU/s. +- `autoscale` applies the shared `COSMOS_DB_AUTOSCALE_MAX_RU` cap to all three containers. + +This keeps the change feed dependencies aligned with the main memory store instead of letting the Functions trigger create the lease container independently. ### Push vs. pull diff --git a/Docs/local_testing.md b/Docs/local_testing.md index 24a5722..33ef95e 100644 --- a/Docs/local_testing.md +++ b/Docs/local_testing.md @@ -72,6 +72,9 @@ Minimum `.env` values: COSMOS_DB_ENDPOINT=https://.documents.azure.com:443/ COSMOS_DB_DATABASE=ai_memory COSMOS_DB_CONTAINER=memories +COSMOS_DB_COUNTERS_CONTAINER=counter +COSMOS_DB_LEASE_CONTAINER=leases +COSMOS_DB_THROUGHPUT_MODE=serverless COSMOS_DB_AUTOSCALE_MAX_RU=1000 AI_FOUNDRY_ENDPOINT=https://.services.ai.azure.com/ @@ -85,6 +88,8 @@ ADF_KEY= The Functions runtime uses `azure_functions/local.settings.json`, not `.env`, so mirror the same values there. +`COSMOS_DB_THROUGHPUT_MODE=serverless` is the default and creates the required Cosmos containers without specifying RU/s. If you set `COSMOS_DB_THROUGHPUT_MODE=autoscale`, the toolkit provisions the memories, counter, and lease containers with the shared max RU/s value from `COSMOS_DB_AUTOSCALE_MAX_RU`. + ### Change feed settings (optional) In `azure_functions/local.settings.json`, add these to enable automatic processing: @@ -92,6 +97,9 @@ In `azure_functions/local.settings.json`, add these to enable automatic processi ```json "COSMOS_DB__accountEndpoint": "https://.documents.azure.com:443/", "COSMOS_DB_COUNTERS_CONTAINER": "counter", +"COSMOS_DB_LEASE_CONTAINER": "leases", +"COSMOS_DB_THROUGHPUT_MODE": "serverless", +"COSMOS_DB_AUTOSCALE_MAX_RU": "1000", "THREAD_SUMMARY_EVERY_N": "5", "FACT_EXTRACTION_EVERY_N": "3", "USER_SUMMARY_EVERY_N": "10" @@ -153,6 +161,10 @@ memory = AgentMemory( cosmos_endpoint=os.getenv("COSMOS_DB_ENDPOINT"), cosmos_database=os.getenv("COSMOS_DB_DATABASE"), cosmos_container=os.getenv("COSMOS_DB_CONTAINER"), + cosmos_counter_container=os.getenv("COSMOS_DB_COUNTERS_CONTAINER", "counter"), + cosmos_lease_container=os.getenv("COSMOS_DB_LEASE_CONTAINER", "leases"), + cosmos_throughput_mode=os.getenv("COSMOS_DB_THROUGHPUT_MODE", "serverless"), + cosmos_autoscale_max_ru=int(os.getenv("COSMOS_DB_AUTOSCALE_MAX_RU", "1000")), ai_foundry_endpoint=os.getenv("AI_FOUNDRY_ENDPOINT"), embedding_model=os.getenv("EMBEDDING_MODEL", "text-embedding-3-large"), adf_endpoint=os.getenv("ADF_ENDPOINT", "http://localhost:7071/api"), @@ -192,6 +204,10 @@ memory = AsyncAgentMemory( cosmos_endpoint=os.getenv("COSMOS_DB_ENDPOINT"), cosmos_database=os.getenv("COSMOS_DB_DATABASE"), cosmos_container=os.getenv("COSMOS_DB_CONTAINER"), + cosmos_counter_container=os.getenv("COSMOS_DB_COUNTERS_CONTAINER", "counter"), + cosmos_lease_container=os.getenv("COSMOS_DB_LEASE_CONTAINER", "leases"), + cosmos_throughput_mode=os.getenv("COSMOS_DB_THROUGHPUT_MODE", "serverless"), + cosmos_autoscale_max_ru=int(os.getenv("COSMOS_DB_AUTOSCALE_MAX_RU", "1000")), ai_foundry_endpoint=os.getenv("AI_FOUNDRY_ENDPOINT"), embedding_model=os.getenv("EMBEDDING_MODEL", "text-embedding-3-large"), adf_endpoint=os.getenv("ADF_ENDPOINT", "http://localhost:7071/api"), @@ -217,7 +233,7 @@ for r in results: await memory.close() ``` -`create_memory_store()` creates the database/container and configures the hierarchical partition key (`user_id`, `thread_id`), vector index, full-text index, and autoscale throughput. +`create_memory_store()` creates the database and required containers, configures the hierarchical partition key (`user_id`, `thread_id`) for memories and counters, uses `/id` for the lease container, and applies either serverless or autoscale throughput based on `COSMOS_DB_THROUGHPUT_MODE`. --- diff --git a/README.md b/README.md index 3403d61..5bffe00 100644 --- a/README.md +++ b/README.md @@ -134,6 +134,10 @@ memory = CosmosMemoryClient( cosmos_endpoint=os.getenv("COSMOS_DB_ENDPOINT"), cosmos_database=os.getenv("COSMOS_DB_DATABASE"), cosmos_container=os.getenv("COSMOS_DB_CONTAINER"), + cosmos_counter_container=os.getenv("COSMOS_DB_COUNTERS_CONTAINER", "counter"), + cosmos_lease_container=os.getenv("COSMOS_DB_LEASE_CONTAINER", "leases"), + cosmos_throughput_mode=os.getenv("COSMOS_DB_THROUGHPUT_MODE", "serverless"), + cosmos_autoscale_max_ru=int(os.getenv("COSMOS_DB_AUTOSCALE_MAX_RU", "1000")), ai_foundry_endpoint=os.getenv("AI_FOUNDRY_ENDPOINT"), embedding_model=os.getenv("EMBEDDING_MODEL", "text-embedding-3-large"), adf_endpoint=os.getenv("ADF_ENDPOINT", "http://localhost:7071/api"), @@ -141,7 +145,9 @@ memory = CosmosMemoryClient( use_default_credential=True, cosmos_credential=DefaultAzureCredential(), ) -# Constructor auto-creates the database and container if they don't exist. +# Constructor auto-creates the database and required containers if they don't exist. +# `serverless` is the default throughput mode. Set `COSMOS_DB_THROUGHPUT_MODE=autoscale` +# to provision memories, counter, and lease containers with a shared autoscale RU cap. # Add directly to Cosmos thread_id = str(uuid.uuid4()) @@ -187,7 +193,7 @@ summary = memory.get_user_summary(user_id="user-001") | **Azure OpenAI / AI Foundry** | Embedding model + chat model for summarization / fact extraction | | **Azure Functions** | Durable Functions orchestrator and activity functions | -Automatic change feed processing stores lightweight counter documents in a dedicated `counter` container and also uses a `leases` container (auto-created). See [concepts.md](Docs/concepts.md#automatic-processing-change-feed) for details. +Automatic change feed processing stores lightweight counter documents in a dedicated `counter` container and also uses a `leases` container that is provisioned by `create_memory_store()`. Throughput defaults to `serverless`; set `COSMOS_DB_THROUGHPUT_MODE=autoscale` to apply the shared `COSMOS_DB_AUTOSCALE_MAX_RU` cap to the memories, counter, and lease containers. See [concepts.md](Docs/concepts.md#automatic-processing-change-feed) for details. All services use **Entra ID** auth via `DefaultAzureCredential`. diff --git a/Samples/Demo.ipynb b/Samples/Demo.ipynb index 9ab5b2c..ea8f5e3 100644 --- a/Samples/Demo.ipynb +++ b/Samples/Demo.ipynb @@ -7,18 +7,37 @@ "source": [ "# Agent Memory Toolkit – Demo\n", "\n", + "\n", + "\n", "This notebook walks through the **Agent Memory Toolkit** library using the synchronous `CosmosMemoryClient` class:\n", "\n", + "\n", + "\n", "1. **Setup** – Install dependencies and load environment variables\n", + "\n", "2. **Local memory operations** – `add_local`, `get_local`, `update_local`, `delete_local`\n", + "\n", "3. **Cosmos DB operations** – `add_cosmos`, `get_memories`\n", + "\n", "4. **Azure Durable Function – Thread Summary** – `generate_thread_summary()` with `CosmosMemoryClient`\n", + "\n", "5. **Azure Durable Function – Extract Facts** – `extract_facts()` with `CosmosMemoryClient`\n", + "\n", "6. **Azure Durable Function – User Summary** – `generate_user_summary()` and `get_user_summary()` with `CosmosMemoryClient`\n", + "\n", "7. **Vector search** – `search_cosmos()` with `CosmosMemoryClient`\n", + "\n", "8. **Automatic processing (Change Feed)** – Write turns and let the change feed trigger process them automatically\n", "\n", - "> **Local hosting:** Sections 4–7 require the Azure Durable Functions host running locally via `func start` (see [local_testing.md](../Docs/local_testing.md) for setup). Section 8 additionally requires the change feed settings configured in `local.settings.json`. To run against a deployed Function App instead, update `ADF_ENDPOINT` in your `.env` file.\n", + "\n", + "\n", + "> **Local hosting:** Sections 4–7 require the Azure Durable Functions host running locally via `func start` (see [local_testing.md](../Docs/local_testing.md) for setup). Section 8 additionally requires the change feed settings configured in `local.settings.json`.\n", + "\n", + ">\n", + "\n", + "> **Cosmos provisioning:** `serverless` is the default throughput mode. If you set `COSMOS_DB_THROUGHPUT_MODE=autoscale`, the toolkit applies the shared `COSMOS_DB_AUTOSCALE_MAX_RU` cap to the `memories`, `counter`, and `leases` containers created by `create_memory_store()`.\n", + "\n", + "\n", "\n", "> For the **async** API (`AsyncCosmosMemoryClient`), see [Demo_async.ipynb](Demo_async.ipynb)." ] @@ -46,21 +65,44 @@ "outputs": [], "source": [ "import os, json\n", + "\n", "from dotenv import load_dotenv\n", + "\n", "from azure.identity import DefaultAzureCredential\n", "\n", + "\n", + "\n", "# Add parent directory to path so we can import the package easily\n", + "\n", "import sys\n", + "\n", "sys.path.insert(0, os.path.abspath(\"..\"))\n", "\n", + "\n", + "\n", "from agent_memory_toolkit import CosmosMemoryClient\n", "\n", + "\n", + "\n", "# Load environment variables from .env in the repo root\n", + "\n", "load_dotenv(os.path.join(\"..\", \".env\"))\n", "\n", + "\n", + "\n", "print(\"COSMOS_DB_ENDPOINT:\", os.getenv(\"COSMOS_DB_ENDPOINT\"))\n", + "\n", "print(\"COSMOS_DB_DATABASE:\", os.getenv(\"COSMOS_DB_DATABASE\"))\n", - "print(\"COSMOS_DB_CONTAINER:\", os.getenv(\"COSMOS_DB_CONTAINER\"))" + "\n", + "print(\"COSMOS_DB_CONTAINER:\", os.getenv(\"COSMOS_DB_CONTAINER\"))\n", + "\n", + "print(\"COSMOS_DB_COUNTERS_CONTAINER:\", os.getenv(\"COSMOS_DB_COUNTERS_CONTAINER\", \"counter\"))\n", + "\n", + "print(\"COSMOS_DB_LEASE_CONTAINER:\", os.getenv(\"COSMOS_DB_LEASE_CONTAINER\", \"leases\"))\n", + "\n", + "print(\"COSMOS_DB_THROUGHPUT_MODE:\", os.getenv(\"COSMOS_DB_THROUGHPUT_MODE\", \"serverless\"))\n", + "\n", + "print(\"COSMOS_DB_AUTOSCALE_MAX_RU:\", os.getenv(\"COSMOS_DB_AUTOSCALE_MAX_RU\", \"1000\"))" ] }, { @@ -76,19 +118,43 @@ "outputs": [], "source": [ "# Create a CosmosMemoryClient instance\n", + "\n", "memory = CosmosMemoryClient(\n", + "\n", " cosmos_endpoint=os.getenv(\"COSMOS_DB_ENDPOINT\"),\n", + "\n", " cosmos_database=os.getenv(\"COSMOS_DB_DATABASE\"),\n", + "\n", " cosmos_container=os.getenv(\"COSMOS_DB_CONTAINER\"),\n", + "\n", + " cosmos_counter_container=os.getenv(\"COSMOS_DB_COUNTERS_CONTAINER\", \"counter\"),\n", + "\n", + " cosmos_lease_container=os.getenv(\"COSMOS_DB_LEASE_CONTAINER\", \"leases\"),\n", + "\n", + " cosmos_throughput_mode=os.getenv(\"COSMOS_DB_THROUGHPUT_MODE\", \"serverless\"),\n", + "\n", + " cosmos_autoscale_max_ru=int(os.getenv(\"COSMOS_DB_AUTOSCALE_MAX_RU\", \"1000\")),\n", + "\n", " ai_foundry_endpoint=os.getenv(\"AI_FOUNDRY_ENDPOINT\"),\n", + "\n", " embedding_model=os.getenv(\"EMBEDDING_MODEL\", \"text-embedding-3-large\"),\n", + "\n", " adf_endpoint=os.getenv(\"ADF_ENDPOINT\", \"http://localhost:7071/api\"),\n", + "\n", " adf_key=os.getenv(\"ADF_KEY\", \"\"),\n", + "\n", " use_default_credential=True,\n", + "\n", " cosmos_credential=DefaultAzureCredential()\n", + "\n", ")\n", "\n", + "\n", + "\n", "print(\"CosmosMemoryClient instance created\")\n", + "\n", + "print(\"Throughput mode:\", os.getenv(\"COSMOS_DB_THROUGHPUT_MODE\", \"serverless\"))\n", + "\n", "print(\"Local memory store:\", memory.local_memory)" ] }, @@ -812,15 +878,29 @@ "source": [ "### 8. Automatic Processing (Change Feed)\n", "\n", + "\n", + "\n", "The toolkit includes a Cosmos DB change feed trigger that automatically fires thread summaries, fact extraction, and user summaries when configurable message count thresholds are crossed.\n", "\n", + "\n", + "\n", "**Prerequisites:**\n", + "\n", "- The Azure Functions host must be running (`func start`)\n", + "\n", "- `local.settings.json` must include change feed settings:\n", + "\n", " - `COSMOS_DB__accountEndpoint` pointing to your Cosmos account\n", + "\n", " - `COSMOS_DB_COUNTERS_CONTAINER` set to `\"counter\"`\n", + "\n", + " - `COSMOS_DB_LEASE_CONTAINER` set to `\"leases\"`\n", + "\n", " - At least one threshold > 0 (e.g. `THREAD_SUMMARY_EVERY_N=3`)\n", - "- A `counter` container must exist in the same database\n", + "\n", + "- `create_memory_store()` must have provisioned the `counter` and `leases` containers in the same database\n", + "\n", + "\n", "\n", "The cells below write enough turns to cross the threshold, then poll for auto-generated derived memories." ] diff --git a/Samples/Demo_async.ipynb b/Samples/Demo_async.ipynb index 2d40494..fc0d286 100644 --- a/Samples/Demo_async.ipynb +++ b/Samples/Demo_async.ipynb @@ -7,18 +7,37 @@ "source": [ "# Agent Memory Toolkit – Async Demo\n", "\n", + "\n", + "\n", "This notebook demonstrates the **async** API of the Agent Memory Toolkit via `AsyncCosmosMemoryClient`:\n", "\n", + "\n", + "\n", "1. [**Setup**](#1-setup) – Install dependencies and load environment variables\n", + "\n", "2. [**Local memory operations**](#2-local-memory-operations) – `add_local`, `get_local`, `update_local`, `delete_local`\n", + "\n", "3. [**Cosmos DB operations**](#3-cosmos-db-operations) – `add_cosmos`, `get_memories`\n", + "\n", "4. [**Summarization**](#4-summarization) – `generate_thread_summary()` with `AsyncCosmosMemoryClient`\n", + "\n", "5. [**Fact Extraction**](#5-fact-extraction) – `extract_facts()` with `AsyncCosmosMemoryClient`\n", + "\n", "6. [**User Summary**](#6-user-summary) – `generate_user_summary()` and `get_user_summary()` with `AsyncCosmosMemoryClient`\n", + "\n", "7. [**Vector search**](#7-vector-search-with-`search_cosmos`) – `search_cosmos()` with `AsyncCosmosMemoryClient`\n", + "\n", "8. [**Automatic processing (Change Feed)**](#8-automatic-processing-change-feed) – Write turns and let the change feed trigger process them automatically\n", "\n", - "> **Local hosting:** Sections 4–7 require the Azure Durable Functions host running locally via `func start` (see [local_testing.md](../Docs/local_testing.md) for setup). Section 8 additionally requires the change feed settings configured in `local.settings.json`. To run against a deployed Function App instead, update `ADF_ENDPOINT` in your `.env` file.\n", + "\n", + "\n", + "> **Local hosting:** Sections 4–7 require the Azure Durable Functions host running locally via `func start` (see [local_testing.md](../Docs/local_testing.md) for setup). Section 8 additionally requires the change feed settings configured in `local.settings.json`.\n", + "\n", + ">\n", + "\n", + "> **Cosmos provisioning:** `serverless` is the default throughput mode. If you set `COSMOS_DB_THROUGHPUT_MODE=autoscale`, the toolkit applies the shared `COSMOS_DB_AUTOSCALE_MAX_RU` cap to the `memories`, `counter`, and `leases` containers created by `create_memory_store()`.\n", + "\n", + "\n", "\n", "> For the **sync** API (`CosmosMemoryClient`), see [Demo.ipynb](Demo.ipynb)." ] @@ -46,18 +65,38 @@ "outputs": [], "source": [ "import os, json, sys\n", + "\n", "from pathlib import Path\n", + "\n", "sys.path.insert(0, os.path.abspath(\"..\"))\n", "\n", + "\n", + "\n", "from dotenv import load_dotenv\n", + "\n", "from agent_memory_toolkit.aio import AsyncCosmosMemoryClient\n", "\n", + "\n", + "\n", "# Load environment variables from .env in the repo root\n", + "\n", "load_dotenv(os.path.join(\"..\", \".env\"))\n", "\n", + "\n", + "\n", "print(\"COSMOS_DB_ENDPOINT:\", os.getenv(\"COSMOS_DB_ENDPOINT\"))\n", + "\n", "print(\"COSMOS_DB_DATABASE:\", os.getenv(\"COSMOS_DB_DATABASE\"))\n", - "print(\"COSMOS_DB_CONTAINER:\", os.getenv(\"COSMOS_DB_CONTAINER\"))" + "\n", + "print(\"COSMOS_DB_CONTAINER:\", os.getenv(\"COSMOS_DB_CONTAINER\"))\n", + "\n", + "print(\"COSMOS_DB_COUNTERS_CONTAINER:\", os.getenv(\"COSMOS_DB_COUNTERS_CONTAINER\", \"counter\"))\n", + "\n", + "print(\"COSMOS_DB_LEASE_CONTAINER:\", os.getenv(\"COSMOS_DB_LEASE_CONTAINER\", \"leases\"))\n", + "\n", + "print(\"COSMOS_DB_THROUGHPUT_MODE:\", os.getenv(\"COSMOS_DB_THROUGHPUT_MODE\", \"serverless\"))\n", + "\n", + "print(\"COSMOS_DB_AUTOSCALE_MAX_RU:\", os.getenv(\"COSMOS_DB_AUTOSCALE_MAX_RU\", \"1000\"))" ] }, { @@ -74,20 +113,46 @@ "source": [ "from azure.identity.aio import DefaultAzureCredential as AsyncDefaultAzureCredential\n", "\n", + "\n", + "\n", "# Create an AsyncCosmosMemoryClient instance\n", + "\n", "memory = AsyncCosmosMemoryClient(\n", + "\n", " cosmos_endpoint=os.getenv(\"COSMOS_DB_ENDPOINT\"),\n", + "\n", " cosmos_database=os.getenv(\"COSMOS_DB_DATABASE\"),\n", + "\n", " cosmos_container=os.getenv(\"COSMOS_DB_CONTAINER\"),\n", + "\n", + " cosmos_counter_container=os.getenv(\"COSMOS_DB_COUNTERS_CONTAINER\", \"counter\"),\n", + "\n", + " cosmos_lease_container=os.getenv(\"COSMOS_DB_LEASE_CONTAINER\", \"leases\"),\n", + "\n", + " cosmos_throughput_mode=os.getenv(\"COSMOS_DB_THROUGHPUT_MODE\", \"serverless\"),\n", + "\n", + " cosmos_autoscale_max_ru=int(os.getenv(\"COSMOS_DB_AUTOSCALE_MAX_RU\", \"1000\")),\n", + "\n", " ai_foundry_endpoint=os.getenv(\"AI_FOUNDRY_ENDPOINT\"),\n", + "\n", " embedding_model=os.getenv(\"EMBEDDING_MODEL\", \"text-embedding-3-large\"),\n", + "\n", " adf_endpoint=os.getenv(\"ADF_ENDPOINT\", \"http://localhost:7071/api\"),\n", + "\n", " adf_key=os.getenv(\"ADF_KEY\", \"\"),\n", + "\n", " use_default_credential=True,\n", + "\n", " cosmos_credential=AsyncDefaultAzureCredential(),\n", + "\n", ")\n", "\n", + "\n", + "\n", "print(\"AsyncCosmosMemoryClient instance created\")\n", + "\n", + "print(\"Throughput mode:\", os.getenv(\"COSMOS_DB_THROUGHPUT_MODE\", \"serverless\"))\n", + "\n", "print(\"Local memory store:\", memory.local_memory)" ] }, @@ -732,15 +797,29 @@ "source": [ "### 8. Automatic Processing (Change Feed)\n", "\n", + "\n", + "\n", "The toolkit includes a Cosmos DB change feed trigger that automatically fires thread summaries, fact extraction, and user summaries when configurable message count thresholds are crossed.\n", "\n", + "\n", + "\n", "**Prerequisites:**\n", + "\n", "- The Azure Functions host must be running (`func start`)\n", + "\n", "- `local.settings.json` must include change feed settings:\n", - " - `COSMOS_DB__accountEndpoint` pointing to your Cosmos account\\n\n", - " - `COSMOS_DB_COUNTERS_CONTAINER` set to `\"counters\"`\n", + "\n", + " - `COSMOS_DB__accountEndpoint` pointing to your Cosmos account\n", + "\n", + " - `COSMOS_DB_COUNTERS_CONTAINER` set to `\"counter\"`\n", + "\n", + " - `COSMOS_DB_LEASE_CONTAINER` set to `\"leases\"`\n", + "\n", " - At least one threshold > 0 (e.g. `THREAD_SUMMARY_EVERY_N=3`)\n", - "- A `counters` container must exist in the same database (partition key: `/user_id`)\n", + "\n", + "- `create_memory_store()` must have provisioned the `counter` and `leases` containers in the same database\n", + "\n", + "\n", "\n", "The cells below write enough turns to cross the threshold, then poll for auto-generated derived memories." ] diff --git a/agent_memory_toolkit/_utils.py b/agent_memory_toolkit/_utils.py index 50a30f9..e9acb4d 100644 --- a/agent_memory_toolkit/_utils.py +++ b/agent_memory_toolkit/_utils.py @@ -69,6 +69,94 @@ def _resolve_embedding_dimensions(val: Optional[int]) -> Optional[int]: return parsed if parsed else None +def _resolve_cosmos_throughput_mode(val: Optional[str]) -> str: + """Resolve throughput mode from explicit value or env var. + + Allowed values are ``serverless`` and ``autoscale``. + """ + raw = (val if val is not None else os.environ.get("COSMOS_DB_THROUGHPUT_MODE") or "serverless").strip().lower() + + if raw not in {"serverless", "autoscale"}: + raise ConfigurationError( + message=( + f"Invalid configuration for cosmos_throughput_mode: expected 'serverless' or 'autoscale', got '{raw}'" + ), + parameter="cosmos_throughput_mode", + ) + return raw + + +def _resolve_cosmos_autoscale_max_ru(val: Optional[int]) -> int: + """Resolve autoscale max RU from explicit value or env var.""" + if val is not None: + if not isinstance(val, int) or isinstance(val, bool) or val <= 0: + raise ConfigurationError( + message=f"Invalid configuration for cosmos_autoscale_max_ru: expected a positive integer, got '{val}'", + parameter="cosmos_autoscale_max_ru", + ) + return val + raw = (os.environ.get("COSMOS_DB_AUTOSCALE_MAX_RU") or "1000").strip() + try: + parsed = int(raw) + except ValueError as exc: + raise ConfigurationError( + message=(f"Invalid configuration for cosmos_autoscale_max_ru: expected an integer, got '{raw}'"), + parameter="cosmos_autoscale_max_ru", + ) from exc + if parsed <= 0: + raise ConfigurationError( + message=(f"Invalid configuration for cosmos_autoscale_max_ru: expected a positive integer, got '{raw}'"), + parameter="cosmos_autoscale_max_ru", + ) + return parsed + + +def _resolve_cosmos_provisioning_autoscale_max_ru( + *, + throughput_mode: str, + autoscale_max_ru: Optional[int], +) -> Optional[int]: + """Resolve autoscale max RU only when autoscale throughput is enabled.""" + if throughput_mode != "autoscale": + return None + return _resolve_cosmos_autoscale_max_ru(autoscale_max_ru) + + +def _cosmos_container_offer_throughput( + *, + throughput_mode: str, + autoscale_max_ru: Optional[int], + throughput_properties_cls: Any, +) -> Any: + """Return ``None`` for serverless mode or a throughput properties instance for autoscale mode.""" + if throughput_mode == "serverless": + return None + if autoscale_max_ru is None: + raise ConfigurationError( + message=("Invalid configuration for cosmos_autoscale_max_ru: autoscale mode requires a positive integer"), + parameter="cosmos_autoscale_max_ru", + ) + return throughput_properties_cls(auto_scale_max_throughput=autoscale_max_ru) + + +def _build_container_kwargs( + *, + container_id: str, + partition_key: Any, + offer_throughput: Optional[Any], + **extras: Any, +) -> dict[str, Any]: + """Build kwargs for ``create_container_if_not_exists`` with optional throughput.""" + kwargs: dict[str, Any] = { + "id": container_id, + "partition_key": partition_key, + **extras, + } + if offer_throughput is not None: + kwargs["offer_throughput"] = offer_throughput + return kwargs + + # --------------------------------------------------------------------------- # Connection / query helpers (shared by sync & async Cosmos clients) # --------------------------------------------------------------------------- diff --git a/agent_memory_toolkit/aio/cosmos_memory_client.py b/agent_memory_toolkit/aio/cosmos_memory_client.py index 7e975d6..3c571b7 100644 --- a/agent_memory_toolkit/aio/cosmos_memory_client.py +++ b/agent_memory_toolkit/aio/cosmos_memory_client.py @@ -19,9 +19,13 @@ from agent_memory_toolkit._utils import ( VALID_ROLES, VALID_TYPES, + _build_container_kwargs, _build_memory_query_builder, _container_policies, + _cosmos_container_offer_throughput, _make_memory, + _resolve_cosmos_provisioning_autoscale_max_ru, + _resolve_cosmos_throughput_mode, _resolve_embedding_dimensions, _validate_connection, _validate_hybrid_search, @@ -62,6 +66,10 @@ def __init__( cosmos_credential: Optional[Any] = None, cosmos_database: Optional[str] = None, cosmos_container: Optional[str] = None, + cosmos_counter_container: Optional[str] = None, + cosmos_lease_container: Optional[str] = None, + cosmos_throughput_mode: Optional[str] = None, + cosmos_autoscale_max_ru: Optional[int] = None, ai_foundry_endpoint: Optional[str] = None, ai_foundry_credential: Optional[Any] = None, ai_foundry_api_key: Optional[str] = None, @@ -79,6 +87,14 @@ def __init__( self._cosmos_credential = cosmos_credential self._cosmos_database = cosmos_database or "ai_memory" self._cosmos_container = cosmos_container or "memories" + self._cosmos_counter_container = cosmos_counter_container or "counter" + self._cosmos_lease_container = cosmos_lease_container or "leases" + self._cosmos_throughput_mode = _resolve_cosmos_throughput_mode(cosmos_throughput_mode) + self._cosmos_throughput_mode = _resolve_cosmos_throughput_mode(cosmos_throughput_mode) + self._cosmos_autoscale_max_ru = _resolve_cosmos_provisioning_autoscale_max_ru( + throughput_mode=self._cosmos_throughput_mode, + autoscale_max_ru=cosmos_autoscale_max_ru, + ) self._ai_foundry_endpoint = ai_foundry_endpoint self._ai_foundry_credential = ai_foundry_credential @@ -326,15 +342,16 @@ async def create_memory_store( self, database: Optional[str] = None, container: Optional[str] = None, - counter_container: str = "counter", + counter_container: Optional[str] = None, + lease_container: Optional[str] = None, endpoint: Optional[str] = None, credential: Optional[Any] = None, embedding_dimensions: Optional[int] = None, embedding_data_type: Optional[str] = None, distance_function: Optional[str] = None, full_text_language: Optional[str] = None, - autoscale_max_ru: int = 1000, - counter_autoscale_max_ru: int = 1000, + throughput_mode: Optional[str] = None, + autoscale_max_ru: Optional[int] = None, ) -> None: """Create the Cosmos DB database and container for memories (async). @@ -346,16 +363,26 @@ async def create_memory_store( * Hierarchical partition key ``[/user_id, /thread_id]`` * ``quantizedFlat`` vector index on ``/embedding`` * Full-text index on ``/content`` - * Autoscale throughput (max RU from *autoscale_max_ru*) + * Throughput behavior controlled by *throughput_mode* - A separate counter container is also provisioned with the same - partition key and autoscale throughput capped by - *counter_autoscale_max_ru*. + Separate counter and lease containers are also provisioned. + In ``serverless`` mode no RU/s throughput is specified. + In ``autoscale`` mode all required containers use the same + autoscale max RU from *autoscale_max_ru*. """ self._cosmos_endpoint = endpoint or self._cosmos_endpoint self._cosmos_credential = credential or self._cosmos_credential self._cosmos_database = database or self._cosmos_database self._cosmos_container = container or self._cosmos_container + self._cosmos_counter_container = counter_container or self._cosmos_counter_container + self._cosmos_lease_container = lease_container or self._cosmos_lease_container + self._cosmos_throughput_mode = _resolve_cosmos_throughput_mode( + throughput_mode if throughput_mode is not None else self._cosmos_throughput_mode + ) + self._cosmos_autoscale_max_ru = _resolve_cosmos_provisioning_autoscale_max_ru( + throughput_mode=self._cosmos_throughput_mode, + autoscale_max_ru=(autoscale_max_ru if autoscale_max_ru is not None else self._cosmos_autoscale_max_ru), + ) _validate_connection( self._cosmos_endpoint, @@ -373,29 +400,44 @@ async def create_memory_store( db = await client.create_database_if_not_exists(id=self._cosmos_database) partition_key = PartitionKey(path=["/user_id", "/thread_id"], kind="MultiHash") + lease_partition_key = PartitionKey(path="/id") vec_policy, idx_policy, ft_policy = _container_policies( embedding_dimensions=embedding_dimensions or self._embedding_dimensions or 1536, embedding_data_type=embedding_data_type or "float32", distance_function=distance_function or "cosine", full_text_language=full_text_language or "en-US", ) + offer_throughput = _cosmos_container_offer_throughput( + throughput_mode=self._cosmos_throughput_mode, + autoscale_max_ru=self._cosmos_autoscale_max_ru, + throughput_properties_cls=ThroughputProperties, + ) container_handle = await db.create_container_if_not_exists( - id=self._cosmos_container, - partition_key=partition_key, - indexing_policy=idx_policy, - vector_embedding_policy=vec_policy, - full_text_policy=ft_policy, - offer_throughput=ThroughputProperties( - auto_scale_max_throughput=autoscale_max_ru, - ), + **_build_container_kwargs( + container_id=self._cosmos_container, + partition_key=partition_key, + offer_throughput=offer_throughput, + indexing_policy=idx_policy, + vector_embedding_policy=vec_policy, + full_text_policy=ft_policy, + ) ) + await db.create_container_if_not_exists( - id=counter_container, - partition_key=partition_key, - offer_throughput=ThroughputProperties( - auto_scale_max_throughput=counter_autoscale_max_ru, - ), + **_build_container_kwargs( + container_id=self._cosmos_counter_container, + partition_key=partition_key, + offer_throughput=offer_throughput, + ) + ) + + await db.create_container_if_not_exists( + **_build_container_kwargs( + container_id=self._cosmos_lease_container, + partition_key=lease_partition_key, + offer_throughput=offer_throughput, + ) ) self._cosmos_client = client self._container_client = container_handle @@ -403,10 +445,11 @@ async def create_memory_store( raise CosmosOperationError(f"Failed to create memory store (async): {exc}") from exc logger.info( - "Async created memory store %s/%s with counter container %s", + "Async created memory store %s/%s with counter container %s and lease container %s", self._cosmos_database, self._cosmos_container, - counter_container, + self._cosmos_counter_container, + self._cosmos_lease_container, ) async def _require_cosmos(self) -> None: diff --git a/agent_memory_toolkit/cosmos_memory_client.py b/agent_memory_toolkit/cosmos_memory_client.py index 037a7f8..a845444 100644 --- a/agent_memory_toolkit/cosmos_memory_client.py +++ b/agent_memory_toolkit/cosmos_memory_client.py @@ -15,9 +15,13 @@ from ._utils import ( VALID_ROLES, VALID_TYPES, + _build_container_kwargs, _build_memory_query_builder, _container_policies, + _cosmos_container_offer_throughput, _make_memory, + _resolve_cosmos_provisioning_autoscale_max_ru, + _resolve_cosmos_throughput_mode, _resolve_embedding_dimensions, _validate_connection, _validate_hybrid_search, @@ -76,6 +80,10 @@ def __init__( cosmos_credential: Optional[Any] = None, cosmos_database: Optional[str] = None, cosmos_container: Optional[str] = None, + cosmos_counter_container: Optional[str] = None, + cosmos_lease_container: Optional[str] = None, + cosmos_throughput_mode: Optional[str] = None, + cosmos_autoscale_max_ru: Optional[int] = None, ai_foundry_endpoint: Optional[str] = None, ai_foundry_credential: Optional[Any] = None, ai_foundry_api_key: Optional[str] = None, @@ -93,6 +101,13 @@ def __init__( self._cosmos_credential = cosmos_credential self._cosmos_database = cosmos_database or "ai_memory" self._cosmos_container = cosmos_container or "memories" + self._cosmos_counter_container = cosmos_counter_container or "counter" + self._cosmos_lease_container = cosmos_lease_container or "leases" + self._cosmos_throughput_mode = _resolve_cosmos_throughput_mode(cosmos_throughput_mode) + self._cosmos_autoscale_max_ru = _resolve_cosmos_provisioning_autoscale_max_ru( + throughput_mode=self._cosmos_throughput_mode, + autoscale_max_ru=cosmos_autoscale_max_ru, + ) self._ai_foundry_endpoint = ai_foundry_endpoint self._ai_foundry_credential = ai_foundry_credential @@ -336,15 +351,16 @@ def create_memory_store( self, database: Optional[str] = None, container: Optional[str] = None, - counter_container: str = "counter", + counter_container: Optional[str] = None, + lease_container: Optional[str] = None, endpoint: Optional[str] = None, credential: Optional[Any] = None, embedding_dimensions: Optional[int] = None, embedding_data_type: Optional[str] = None, distance_function: Optional[str] = None, full_text_language: Optional[str] = None, - autoscale_max_ru: int = 1000, - counter_autoscale_max_ru: int = 1000, + throughput_mode: Optional[str] = None, + autoscale_max_ru: Optional[int] = None, ) -> None: """Create the Cosmos DB database and container for memories. @@ -356,16 +372,26 @@ def create_memory_store( * Hierarchical partition key ``[/user_id, /thread_id]`` * ``quantizedFlat`` vector index on ``/embedding`` * Full-text index on ``/content`` - * Autoscale throughput (max RU from *autoscale_max_ru*) + * Throughput behavior controlled by *throughput_mode* - A separate counter container is also provisioned with the same - partition key and autoscale throughput capped by - *counter_autoscale_max_ru*. + Separate counter and lease containers are also provisioned. + In ``serverless`` mode no RU/s throughput is specified. + In ``autoscale`` mode all required containers use the same + autoscale max RU from *autoscale_max_ru*. """ self._cosmos_endpoint = endpoint or self._cosmos_endpoint self._cosmos_credential = credential or self._cosmos_credential self._cosmos_database = database or self._cosmos_database self._cosmos_container = container or self._cosmos_container + self._cosmos_counter_container = counter_container or self._cosmos_counter_container + self._cosmos_lease_container = lease_container or self._cosmos_lease_container + self._cosmos_throughput_mode = _resolve_cosmos_throughput_mode( + throughput_mode if throughput_mode is not None else self._cosmos_throughput_mode + ) + self._cosmos_autoscale_max_ru = _resolve_cosmos_provisioning_autoscale_max_ru( + throughput_mode=self._cosmos_throughput_mode, + autoscale_max_ru=(autoscale_max_ru if autoscale_max_ru is not None else self._cosmos_autoscale_max_ru), + ) _validate_connection( self._cosmos_endpoint, @@ -382,29 +408,44 @@ def create_memory_store( db = client.create_database_if_not_exists(id=self._cosmos_database) partition_key = PartitionKey(path=["/user_id", "/thread_id"], kind="MultiHash") + lease_partition_key = PartitionKey(path="/id") vec_policy, idx_policy, ft_policy = _container_policies( embedding_dimensions=embedding_dimensions or self._embedding_dimensions or 1536, embedding_data_type=embedding_data_type or "float32", distance_function=distance_function or "cosine", full_text_language=full_text_language or "en-US", ) + offer_throughput = _cosmos_container_offer_throughput( + throughput_mode=self._cosmos_throughput_mode, + autoscale_max_ru=self._cosmos_autoscale_max_ru, + throughput_properties_cls=ThroughputProperties, + ) container_handle = db.create_container_if_not_exists( - id=self._cosmos_container, - partition_key=partition_key, - indexing_policy=idx_policy, - vector_embedding_policy=vec_policy, - full_text_policy=ft_policy, - offer_throughput=ThroughputProperties( - auto_scale_max_throughput=autoscale_max_ru, - ), + **_build_container_kwargs( + container_id=self._cosmos_container, + partition_key=partition_key, + offer_throughput=offer_throughput, + indexing_policy=idx_policy, + vector_embedding_policy=vec_policy, + full_text_policy=ft_policy, + ) ) + db.create_container_if_not_exists( - id=counter_container, - partition_key=partition_key, - offer_throughput=ThroughputProperties( - auto_scale_max_throughput=counter_autoscale_max_ru, - ), + **_build_container_kwargs( + container_id=self._cosmos_counter_container, + partition_key=partition_key, + offer_throughput=offer_throughput, + ) + ) + + db.create_container_if_not_exists( + **_build_container_kwargs( + container_id=self._cosmos_lease_container, + partition_key=lease_partition_key, + offer_throughput=offer_throughput, + ) ) self._cosmos_client = client self._container_client = container_handle @@ -412,10 +453,11 @@ def create_memory_store( raise CosmosOperationError(f"Failed to create memory store: {exc}") from exc logger.info( - "Created memory store %s/%s with counter container %s", + "Created memory store %s/%s with counter container %s and lease container %s", self._cosmos_database, self._cosmos_container, - counter_container, + self._cosmos_counter_container, + self._cosmos_lease_container, ) def _require_cosmos(self) -> None: diff --git a/azure_functions/local.settings.json.template b/azure_functions/local.settings.json.template index e9a7ccf..bebc1da 100644 --- a/azure_functions/local.settings.json.template +++ b/azure_functions/local.settings.json.template @@ -10,6 +10,8 @@ "COSMOS_DB_CONTAINER": "memories", "COSMOS_DB_LEASE_CONTAINER": "leases", "COSMOS_DB_COUNTERS_CONTAINER": "counter", + "COSMOS_DB_THROUGHPUT_MODE": "serverless", + "COSMOS_DB_AUTOSCALE_MAX_RU": "1000", "THREAD_SUMMARY_EVERY_N": "0", "FACT_EXTRACTION_EVERY_N": "0", diff --git a/tests/unit/aio/test_cosmos_memory_client.py b/tests/unit/aio/test_cosmos_memory_client.py index 9a9112f..da738b0 100644 --- a/tests/unit/aio/test_cosmos_memory_client.py +++ b/tests/unit/aio/test_cosmos_memory_client.py @@ -10,6 +10,7 @@ from agent_memory_toolkit.aio.cosmos_memory_client import AsyncCosmosMemoryClient from agent_memory_toolkit.exceptions import ( + ConfigurationError, CosmosNotConnectedError, MemoryNotFoundError, ValidationError, @@ -240,12 +241,13 @@ async def test_create_memory_store_with_counter_container(self): mock_db = AsyncMock() mock_memories_container = MagicMock() mock_counter_container = MagicMock() + mock_lease_container = MagicMock() mock_throughput_cls = MagicMock(side_effect=lambda **kwargs: type("Throughput", (), kwargs)()) mock_cosmos_cls.return_value = mock_client mock_client.create_database_if_not_exists = AsyncMock(return_value=mock_db) mock_db.create_container_if_not_exists = AsyncMock( - side_effect=[mock_memories_container, mock_counter_container] + side_effect=[mock_memories_container, mock_counter_container, mock_lease_container] ) with patch.dict( @@ -262,21 +264,70 @@ async def test_create_memory_store_with_counter_container(self): endpoint="https://fake.documents.azure.com:443/", credential="fake-key", embedding_dimensions=256, - counter_autoscale_max_ru=1000, + throughput_mode="autoscale", + autoscale_max_ru=1000, ) mock_client.create_database_if_not_exists.assert_awaited_once() memories_call = mock_db.create_container_if_not_exists.await_args_list[0] counter_call = mock_db.create_container_if_not_exists.await_args_list[1] + lease_call = mock_db.create_container_if_not_exists.await_args_list[2] vec_policy = memories_call.kwargs["vector_embedding_policy"] assert vec_policy["vectorEmbeddings"][0]["dimensions"] == 256 ft_policy = memories_call.kwargs["full_text_policy"] assert ft_policy["defaultLanguage"] == "en-US" assert counter_call.kwargs["id"] == "counter" assert counter_call.kwargs["offer_throughput"].auto_scale_max_throughput == 1000 + assert lease_call.kwargs["id"] == "leases" + assert lease_call.kwargs["offer_throughput"].auto_scale_max_throughput == 1000 assert "vector_embedding_policy" not in counter_call.kwargs assert mem._container_client is mock_memories_container + async def test_create_memory_store_defaults_to_serverless(self): + mem = _make_client(cosmos_throughput_mode="serverless") + mock_cosmos_cls = MagicMock() + mock_client = MagicMock() + mock_db = AsyncMock() + mock_memories_container = MagicMock() + mock_counter_container = MagicMock() + mock_lease_container = MagicMock() + + mock_cosmos_cls.return_value = mock_client + mock_client.create_database_if_not_exists = AsyncMock(return_value=mock_db) + mock_db.create_container_if_not_exists = AsyncMock( + side_effect=[mock_memories_container, mock_counter_container, mock_lease_container] + ) + + with patch.dict("os.environ", {"COSMOS_DB_AUTOSCALE_MAX_RU": "not-an-int"}, clear=False): + with patch.dict( + "sys.modules", + { + "azure.cosmos.aio": MagicMock(CosmosClient=mock_cosmos_cls), + "azure.cosmos": MagicMock( + PartitionKey=MagicMock(), + ThroughputProperties=MagicMock(), + ), + }, + ): + await mem.create_memory_store( + endpoint="https://fake.documents.azure.com:443/", + credential="fake-key", + throughput_mode="serverless", + ) + + for call in mock_db.create_container_if_not_exists.await_args_list: + assert "offer_throughput" not in call.kwargs + + def test_constructor_ignores_invalid_autoscale_env_in_serverless_mode(self): + with patch.dict("os.environ", {"COSMOS_DB_AUTOSCALE_MAX_RU": "not-an-int"}, clear=False): + mem = _make_client(cosmos_throughput_mode="serverless") + + assert mem._cosmos_autoscale_max_ru is None + + def test_constructor_rejects_invalid_throughput_mode(self): + with pytest.raises(ConfigurationError, match="expected 'serverless' or 'autoscale'"): + _make_client(cosmos_throughput_mode="invalid") + class TestRequireCosmos: async def test_require_cosmos_before_connect(self): diff --git a/tests/unit/test_cosmos_memory_client.py b/tests/unit/test_cosmos_memory_client.py index f3ca806..9771b34 100644 --- a/tests/unit/test_cosmos_memory_client.py +++ b/tests/unit/test_cosmos_memory_client.py @@ -10,6 +10,7 @@ from agent_memory_toolkit.cosmos_memory_client import CosmosMemoryClient from agent_memory_toolkit.exceptions import ( + ConfigurationError, CosmosNotConnectedError, MemoryNotFoundError, ValidationError, @@ -212,11 +213,13 @@ def test_auto_creates_store_when_endpoint_provided(self): mock_db = MagicMock() mock_memories_container = MagicMock() mock_counter_container = MagicMock() + mock_lease_container = MagicMock() mock_cosmos_cls.return_value = mock_client mock_client.create_database_if_not_exists.return_value = mock_db mock_db.create_container_if_not_exists.side_effect = [ mock_memories_container, mock_counter_container, + mock_lease_container, ] with patch.dict( @@ -236,7 +239,7 @@ def test_auto_creates_store_when_endpoint_provided(self): assert mem._container_client is mock_memories_container mock_client.create_database_if_not_exists.assert_called_once() - assert mock_db.create_container_if_not_exists.call_count == 2 + assert mock_db.create_container_if_not_exists.call_count == 3 class TestRequireCosmos: @@ -254,12 +257,14 @@ def test_create_memory_store_with_custom_dimensions(self): mock_db = MagicMock() mock_memories_container = MagicMock() mock_counter_container = MagicMock() + mock_lease_container = MagicMock() mock_throughput_cls = MagicMock(side_effect=lambda **kwargs: type("Throughput", (), kwargs)()) mock_cosmos_cls.return_value = mock_client mock_client.create_database_if_not_exists.return_value = mock_db mock_db.create_container_if_not_exists.side_effect = [ mock_memories_container, mock_counter_container, + mock_lease_container, ] # Start local-only, then create store explicitly @@ -279,21 +284,72 @@ def test_create_memory_store_with_custom_dimensions(self): endpoint="https://fake.documents.azure.com:443/", credential="fake-key", embedding_dimensions=256, - counter_autoscale_max_ru=1000, + throughput_mode="autoscale", + autoscale_max_ru=1000, ) mock_client.create_database_if_not_exists.assert_called_once() memories_call = mock_db.create_container_if_not_exists.call_args_list[0] counter_call = mock_db.create_container_if_not_exists.call_args_list[1] + lease_call = mock_db.create_container_if_not_exists.call_args_list[2] vec_policy = memories_call.kwargs["vector_embedding_policy"] assert vec_policy["vectorEmbeddings"][0]["dimensions"] == 256 ft_policy = memories_call.kwargs["full_text_policy"] assert ft_policy["defaultLanguage"] == "en-US" assert counter_call.kwargs["id"] == "counter" assert counter_call.kwargs["offer_throughput"].auto_scale_max_throughput == 1000 + assert lease_call.kwargs["id"] == "leases" + assert lease_call.kwargs["offer_throughput"].auto_scale_max_throughput == 1000 assert "vector_embedding_policy" not in counter_call.kwargs assert mem._container_client is mock_memories_container + def test_create_memory_store_defaults_to_serverless(self): + mock_cosmos_cls = MagicMock() + mock_client = MagicMock() + mock_db = MagicMock() + mock_memories_container = MagicMock() + mock_counter_container = MagicMock() + mock_lease_container = MagicMock() + mock_cosmos_cls.return_value = mock_client + mock_client.create_database_if_not_exists.return_value = mock_db + mock_db.create_container_if_not_exists.side_effect = [ + mock_memories_container, + mock_counter_container, + mock_lease_container, + ] + + mem = _make_client(cosmos_throughput_mode="serverless") + + with patch.dict("os.environ", {"COSMOS_DB_AUTOSCALE_MAX_RU": "not-an-int"}, clear=False): + with patch.dict( + "sys.modules", + { + "azure.cosmos": MagicMock( + CosmosClient=mock_cosmos_cls, + PartitionKey=MagicMock(), + ThroughputProperties=MagicMock(), + ), + }, + ): + mem.create_memory_store( + endpoint="https://fake.documents.azure.com:443/", + credential="fake-key", + throughput_mode="serverless", + ) + + for call in mock_db.create_container_if_not_exists.call_args_list: + assert "offer_throughput" not in call.kwargs + + def test_constructor_ignores_invalid_autoscale_env_in_serverless_mode(self): + with patch.dict("os.environ", {"COSMOS_DB_AUTOSCALE_MAX_RU": "not-an-int"}, clear=False): + mem = _make_client(cosmos_throughput_mode="serverless") + + assert mem._cosmos_autoscale_max_ru is None + + def test_constructor_rejects_invalid_throughput_mode(self): + with pytest.raises(ConfigurationError, match="expected 'serverless' or 'autoscale'"): + _make_client(cosmos_throughput_mode="invalid") + # =================================================================== # Cosmos CRUD (mock _container_client) diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py new file mode 100644 index 0000000..a4f41a8 --- /dev/null +++ b/tests/unit/test_utils.py @@ -0,0 +1,35 @@ +"""Unit tests for shared helpers in agent_memory_toolkit._utils.""" + +from agent_memory_toolkit._utils import _build_container_kwargs + + +def test_build_container_kwargs_includes_required_fields_and_extras(): + partition_key = object() + throughput = object() + + kwargs = _build_container_kwargs( + container_id="memories", + partition_key=partition_key, + offer_throughput=throughput, + indexing_policy={"includedPaths": [{"path": "/*"}]}, + full_text_policy={"defaultLanguage": "en-US"}, + ) + + assert kwargs["id"] == "memories" + assert kwargs["partition_key"] is partition_key + assert kwargs["offer_throughput"] is throughput + assert kwargs["indexing_policy"] == {"includedPaths": [{"path": "/*"}]} + assert kwargs["full_text_policy"] == {"defaultLanguage": "en-US"} + + +def test_build_container_kwargs_omits_offer_throughput_when_none(): + kwargs = _build_container_kwargs( + container_id="leases", + partition_key="/id", + offer_throughput=None, + ) + + assert kwargs == { + "id": "leases", + "partition_key": "/id", + }