diff --git a/.env.template b/.env.template
index 3717937..dc6567c 100644
--- a/.env.template
+++ b/.env.template
@@ -9,6 +9,14 @@ COSMOS_DB_DATABASE=ai_memory
 COSMOS_DB_CONTAINER=memories
 COSMOS_DB_COUNTERS_CONTAINER=counter
 COSMOS_DB_LEASE_CONTAINER=leases
+# Throughput mode for all required Cosmos DB containers created by the toolkit
+# (memories, counter, and lease).
+# - serverless: default. The toolkit does not send container RU/s settings.
+#   Use this only with a Cosmos DB account configured for serverless.
+# - autoscale: the toolkit provisions all required containers with autoscale
+#   throughput using COSMOS_DB_AUTOSCALE_MAX_RU as the max RU/s cap.
+#   Default max RU/s is 1000.
+COSMOS_DB_THROUGHPUT_MODE=serverless
 COSMOS_DB_AUTOSCALE_MAX_RU=1000
 
 # ---- Change Feed Thresholds (set to 0 to disable) ----
diff --git a/Docs/README.md b/Docs/README.md
index b4e4046..4332550 100644
--- a/Docs/README.md
+++ b/Docs/README.md
@@ -6,9 +6,9 @@ This folder contains the main project documentation for Agent Memory Toolkit.
 
 | Document | Purpose |
 |----------|---------|
-| [concepts.md](concepts.md) | Explains the core memory model, including memory types (turn, summary, fact, user summary), threads, roles, the processing pipeline, and automatic change feed processing. |
-| [local_testing.md](local_testing.md) | Covers local setup, environment configuration, RBAC, Cosmos provisioning, running the toolkit and Azure Functions locally, and testing change feed auto-processing. |
-| [azure_testing.md](azure_testing.md) | Covers Azure deployment, cloud configuration, required services, change feed settings, and validation steps for running the toolkit in Azure. |
+| [concepts.md](concepts.md) | Explains the core memory model, including memory types (turn, summary, fact, user summary), threads, roles, the processing pipeline, automatic change feed processing, and shared Cosmos throughput configuration. |
+| [local_testing.md](local_testing.md) | Covers local setup, environment configuration, RBAC, Cosmos provisioning, running the toolkit and Azure Functions locally, and testing change feed auto-processing with serverless or autoscale container provisioning. |
+| [azure_testing.md](azure_testing.md) | Covers Azure deployment, cloud configuration, required services, change feed settings, throughput mode configuration, and validation steps for running the toolkit in Azure. |
 | [design_patterns.md](design_patterns.md) | Shows when and how to call CRUD operations, summarization, fact extraction, and memory retrieval in chat and multi-agent applications, including automatic processing via the change feed. |
 
 ## Recommended Reading Order
diff --git a/Docs/azure_testing.md b/Docs/azure_testing.md
index ac8bad4..826b8b3 100644
--- a/Docs/azure_testing.md
+++ b/Docs/azure_testing.md
@@ -1,6 +1,6 @@
 # Deploying and Testing Agent Memory Toolkit in Azure
 
-This guide covers the minimum Azure resources, deployment steps, and validation order for running the toolkit in Azure.
+This guide covers the minimum Azure resources, deployment steps, throughput settings, and validation order for running the toolkit in Azure.
 
 ---
 
@@ -71,7 +71,7 @@ az cosmosdb create \
   --resource-group <resource-group>
 ```
 
-The toolkit can create the database and container later via `create_memory_store()`.
+The toolkit can create the database and required containers later via `create_memory_store()`.
 
 ---
 
@@ -104,6 +104,9 @@ az functionapp config appsettings set \
     COSMOS_DB_ENDPOINT="https://<cosmos-account-name>.documents.azure.com:443/" \
     COSMOS_DB_DATABASE="ai_memory" \
     COSMOS_DB_CONTAINER="memories" \
+    COSMOS_DB_COUNTERS_CONTAINER="counter" \
+    COSMOS_DB_LEASE_CONTAINER="leases" \
+    COSMOS_DB_THROUGHPUT_MODE="serverless" \
     COSMOS_DB_AUTOSCALE_MAX_RU="1000" \
     AI_FOUNDRY_ENDPOINT="https://<openai-account-name>.openai.azure.com/" \
     EMBEDDING_MODEL="text-embedding-3-large" \
@@ -111,6 +114,8 @@ az functionapp config appsettings set \
     LLM_MODEL="gpt-5-mini"
 ```
 
+`COSMOS_DB_THROUGHPUT_MODE=serverless` is the default and creates the `memories`, `counter`, and `leases` containers without specifying RU/s. Set `COSMOS_DB_THROUGHPUT_MODE=autoscale` to apply the shared `COSMOS_DB_AUTOSCALE_MAX_RU` cap to all required containers.
+
 ### Change feed settings (optional)
 
 To enable automatic processing via the change feed trigger, add these settings:
@@ -122,6 +127,9 @@ az functionapp config appsettings set \
   --settings \
     COSMOS_DB__accountEndpoint="https://<cosmos-account-name>.documents.azure.com:443/" \
     COSMOS_DB_COUNTERS_CONTAINER="counter" \
+    COSMOS_DB_LEASE_CONTAINER="leases" \
+    COSMOS_DB_THROUGHPUT_MODE="serverless" \
+    COSMOS_DB_AUTOSCALE_MAX_RU="1000" \
     THREAD_SUMMARY_EVERY_N="5" \
     FACT_EXTRACTION_EVERY_N="3" \
     USER_SUMMARY_EVERY_N="10"
@@ -129,7 +137,7 @@ az functionapp config appsettings set \
 
 Set any threshold to `"0"` to disable that processing type.
 
-The `leases` container is created automatically by the Azure Functions runtime.
+The `leases` container is provisioned by `create_memory_store()` alongside the `memories` and `counter` containers, so the Function App should be configured to use that existing lease container.
 
 If you use function-key auth for the HTTP trigger, keep the key for the client as `ADF_KEY`.
 
@@ -161,6 +169,9 @@ Update `.env` to point at Azure instead of localhost:
 COSMOS_DB_ENDPOINT=https://<cosmos-account-name>.documents.azure.com:443/
 COSMOS_DB_DATABASE=ai_memory
 COSMOS_DB_CONTAINER=memories
+COSMOS_DB_COUNTERS_CONTAINER=counter
+COSMOS_DB_LEASE_CONTAINER=leases
+COSMOS_DB_THROUGHPUT_MODE=serverless
 COSMOS_DB_AUTOSCALE_MAX_RU=1000
 
 AI_FOUNDRY_ENDPOINT=https://<openai-account-name>.openai.azure.com/
@@ -192,6 +203,10 @@ memory = AgentMemory(
     cosmos_endpoint=os.getenv("COSMOS_DB_ENDPOINT"),
     cosmos_database=os.getenv("COSMOS_DB_DATABASE"),
     cosmos_container=os.getenv("COSMOS_DB_CONTAINER"),
+    cosmos_counter_container=os.getenv("COSMOS_DB_COUNTERS_CONTAINER", "counter"),
+    cosmos_lease_container=os.getenv("COSMOS_DB_LEASE_CONTAINER", "leases"),
+    cosmos_throughput_mode=os.getenv("COSMOS_DB_THROUGHPUT_MODE", "serverless"),
+    cosmos_autoscale_max_ru=int(os.getenv("COSMOS_DB_AUTOSCALE_MAX_RU", "1000")),
     ai_foundry_endpoint=os.getenv("AI_FOUNDRY_ENDPOINT"),
     embedding_model=os.getenv("EMBEDDING_MODEL", "text-embedding-3-large"),
     adf_endpoint=os.getenv("ADF_ENDPOINT"),
@@ -218,6 +233,10 @@ memory = AsyncAgentMemory(
     cosmos_endpoint=os.getenv("COSMOS_DB_ENDPOINT"),
     cosmos_database=os.getenv("COSMOS_DB_DATABASE"),
     cosmos_container=os.getenv("COSMOS_DB_CONTAINER"),
+    cosmos_counter_container=os.getenv("COSMOS_DB_COUNTERS_CONTAINER", "counter"),
+    cosmos_lease_container=os.getenv("COSMOS_DB_LEASE_CONTAINER", "leases"),
+    cosmos_throughput_mode=os.getenv("COSMOS_DB_THROUGHPUT_MODE", "serverless"),
+    cosmos_autoscale_max_ru=int(os.getenv("COSMOS_DB_AUTOSCALE_MAX_RU", "1000")),
     ai_foundry_endpoint=os.getenv("AI_FOUNDRY_ENDPOINT"),
     embedding_model=os.getenv("EMBEDDING_MODEL", "text-embedding-3-large"),
     adf_endpoint=os.getenv("ADF_ENDPOINT"),
@@ -235,7 +254,7 @@ await memory.connect_cosmos(
 await memory.create_memory_store()
 ```
 
-This provisions the hierarchical partition key (`user_id`, `thread_id`), vector index, full-text index, and autoscale throughput.
+This provisions the `memories`, `counter`, and `leases` containers. `serverless` is the default throughput mode; if you set `COSMOS_DB_THROUGHPUT_MODE=autoscale`, the shared `COSMOS_DB_AUTOSCALE_MAX_RU` value is applied to all three containers.
 
 ---
 
diff --git a/Docs/concepts.md b/Docs/concepts.md
index 5de9f10..abcff4f 100644
--- a/Docs/concepts.md
+++ b/Docs/concepts.md
@@ -154,7 +154,16 @@ Set any value to `0` to disable that processing type. For example, setting `THRE
 |-----------|---------------|---------|
 | `memories` | `/user_id`, `/thread_id` (hierarchical) | Existing memory store |
 | `counter` | `/user_id`, `/thread_id` (hierarchical) | Message count tracking for automatic processing |
-| `leases` | `/id` | Auto-created by the trigger for change feed checkpointing |
+| `leases` | `/id` | Change feed checkpointing container created by `create_memory_store()` |
+
+### Throughput configuration
+
+The toolkit provisions all required Cosmos containers under one shared throughput mode:
+
+- `serverless` is the default. The toolkit creates the `memories`, `counter`, and `leases` containers without specifying RU/s.
+- `autoscale` applies the shared `COSMOS_DB_AUTOSCALE_MAX_RU` cap to all three containers.
+
+This keeps the change feed dependencies aligned with the main memory store instead of letting the Functions trigger create the lease container independently.
 
 ### Push vs. pull
 
diff --git a/Docs/local_testing.md b/Docs/local_testing.md
index 24a5722..33ef95e 100644
--- a/Docs/local_testing.md
+++ b/Docs/local_testing.md
@@ -72,6 +72,9 @@ Minimum `.env` values:
 COSMOS_DB_ENDPOINT=https://<your-account>.documents.azure.com:443/
 COSMOS_DB_DATABASE=ai_memory
 COSMOS_DB_CONTAINER=memories
+COSMOS_DB_COUNTERS_CONTAINER=counter
+COSMOS_DB_LEASE_CONTAINER=leases
+COSMOS_DB_THROUGHPUT_MODE=serverless
 COSMOS_DB_AUTOSCALE_MAX_RU=1000
 
 AI_FOUNDRY_ENDPOINT=https://<your-project>.services.ai.azure.com/
@@ -85,6 +88,8 @@ ADF_KEY=
 
 The Functions runtime uses `azure_functions/local.settings.json`, not `.env`, so mirror the same values there.
 
+`COSMOS_DB_THROUGHPUT_MODE=serverless` is the default and creates the required Cosmos containers without specifying RU/s. If you set `COSMOS_DB_THROUGHPUT_MODE=autoscale`, the toolkit provisions the memories, counter, and lease containers with the shared max RU/s value from `COSMOS_DB_AUTOSCALE_MAX_RU`.
+
 ### Change feed settings (optional)
 
 In `azure_functions/local.settings.json`, add these to enable automatic processing:
@@ -92,6 +97,9 @@ In `azure_functions/local.settings.json`, add these to enable automatic processi
 ```json
 "COSMOS_DB__accountEndpoint": "https://<your-account>.documents.azure.com:443/",
 "COSMOS_DB_COUNTERS_CONTAINER": "counter",
+"COSMOS_DB_LEASE_CONTAINER": "leases",
+"COSMOS_DB_THROUGHPUT_MODE": "serverless",
+"COSMOS_DB_AUTOSCALE_MAX_RU": "1000",
 "THREAD_SUMMARY_EVERY_N": "5",
 "FACT_EXTRACTION_EVERY_N": "3",
 "USER_SUMMARY_EVERY_N": "10"
@@ -153,6 +161,10 @@ memory = AgentMemory(
     cosmos_endpoint=os.getenv("COSMOS_DB_ENDPOINT"),
     cosmos_database=os.getenv("COSMOS_DB_DATABASE"),
     cosmos_container=os.getenv("COSMOS_DB_CONTAINER"),
+    cosmos_counter_container=os.getenv("COSMOS_DB_COUNTERS_CONTAINER", "counter"),
+    cosmos_lease_container=os.getenv("COSMOS_DB_LEASE_CONTAINER", "leases"),
+    cosmos_throughput_mode=os.getenv("COSMOS_DB_THROUGHPUT_MODE", "serverless"),
+    cosmos_autoscale_max_ru=int(os.getenv("COSMOS_DB_AUTOSCALE_MAX_RU", "1000")),
     ai_foundry_endpoint=os.getenv("AI_FOUNDRY_ENDPOINT"),
     embedding_model=os.getenv("EMBEDDING_MODEL", "text-embedding-3-large"),
     adf_endpoint=os.getenv("ADF_ENDPOINT", "http://localhost:7071/api"),
@@ -192,6 +204,10 @@ memory = AsyncAgentMemory(
     cosmos_endpoint=os.getenv("COSMOS_DB_ENDPOINT"),
     cosmos_database=os.getenv("COSMOS_DB_DATABASE"),
     cosmos_container=os.getenv("COSMOS_DB_CONTAINER"),
+    cosmos_counter_container=os.getenv("COSMOS_DB_COUNTERS_CONTAINER", "counter"),
+    cosmos_lease_container=os.getenv("COSMOS_DB_LEASE_CONTAINER", "leases"),
+    cosmos_throughput_mode=os.getenv("COSMOS_DB_THROUGHPUT_MODE", "serverless"),
+    cosmos_autoscale_max_ru=int(os.getenv("COSMOS_DB_AUTOSCALE_MAX_RU", "1000")),
     ai_foundry_endpoint=os.getenv("AI_FOUNDRY_ENDPOINT"),
     embedding_model=os.getenv("EMBEDDING_MODEL", "text-embedding-3-large"),
     adf_endpoint=os.getenv("ADF_ENDPOINT", "http://localhost:7071/api"),
@@ -217,7 +233,7 @@ for r in results:
 await memory.close()
 ```
 
-`create_memory_store()` creates the database/container and configures the hierarchical partition key (`user_id`, `thread_id`), vector index, full-text index, and autoscale throughput.
+`create_memory_store()` creates the database and required containers, configures the hierarchical partition key (`user_id`, `thread_id`) for memories and counters, uses `/id` for the lease container, and applies either serverless or autoscale throughput based on `COSMOS_DB_THROUGHPUT_MODE`.
 
 ---
 
diff --git a/README.md b/README.md
index 3403d61..5bffe00 100644
--- a/README.md
+++ b/README.md
@@ -134,6 +134,10 @@ memory = CosmosMemoryClient(
     cosmos_endpoint=os.getenv("COSMOS_DB_ENDPOINT"),
     cosmos_database=os.getenv("COSMOS_DB_DATABASE"),
     cosmos_container=os.getenv("COSMOS_DB_CONTAINER"),
+    cosmos_counter_container=os.getenv("COSMOS_DB_COUNTERS_CONTAINER", "counter"),
+    cosmos_lease_container=os.getenv("COSMOS_DB_LEASE_CONTAINER", "leases"),
+    cosmos_throughput_mode=os.getenv("COSMOS_DB_THROUGHPUT_MODE", "serverless"),
+    cosmos_autoscale_max_ru=int(os.getenv("COSMOS_DB_AUTOSCALE_MAX_RU", "1000")),
     ai_foundry_endpoint=os.getenv("AI_FOUNDRY_ENDPOINT"),
     embedding_model=os.getenv("EMBEDDING_MODEL", "text-embedding-3-large"),
     adf_endpoint=os.getenv("ADF_ENDPOINT", "http://localhost:7071/api"),
@@ -141,7 +145,9 @@ memory = CosmosMemoryClient(
     use_default_credential=True,
     cosmos_credential=DefaultAzureCredential(),
 )
-# Constructor auto-creates the database and container if they don't exist.
+# Constructor auto-creates the database and required containers if they don't exist.
+# `serverless` is the default throughput mode. Set `COSMOS_DB_THROUGHPUT_MODE=autoscale`
+# to provision memories, counter, and lease containers with a shared autoscale RU cap.
 
 # Add directly to Cosmos
 thread_id = str(uuid.uuid4())
@@ -187,7 +193,7 @@ summary = memory.get_user_summary(user_id="user-001")
 | **Azure OpenAI / AI Foundry** | Embedding model + chat model for summarization / fact extraction |
 | **Azure Functions** | Durable Functions orchestrator and activity functions |
 
-Automatic change feed processing stores lightweight counter documents in a dedicated `counter` container and also uses a `leases` container (auto-created). See [concepts.md](Docs/concepts.md#automatic-processing-change-feed) for details.
+Automatic change feed processing stores lightweight counter documents in a dedicated `counter` container and also uses a `leases` container that is provisioned by `create_memory_store()`. Throughput defaults to `serverless`; set `COSMOS_DB_THROUGHPUT_MODE=autoscale` to apply the shared `COSMOS_DB_AUTOSCALE_MAX_RU` cap to the memories, counter, and lease containers. See [concepts.md](Docs/concepts.md#automatic-processing-change-feed) for details.
 
 All services use **Entra ID** auth via `DefaultAzureCredential`.
 
diff --git a/Samples/Demo.ipynb b/Samples/Demo.ipynb
index 9ab5b2c..ea8f5e3 100644
--- a/Samples/Demo.ipynb
+++ b/Samples/Demo.ipynb
@@ -7,18 +7,37 @@
    "source": [
     "# Agent Memory Toolkit – Demo\n",
     "\n",
+    "\n",
+    "\n",
     "This notebook walks through the **Agent Memory Toolkit** library using the synchronous `CosmosMemoryClient` class:\n",
     "\n",
+    "\n",
+    "\n",
     "1. **Setup** – Install dependencies and load environment variables\n",
+    "\n",
     "2. **Local memory operations** – `add_local`, `get_local`, `update_local`, `delete_local`\n",
+    "\n",
     "3. **Cosmos DB operations** – `add_cosmos`, `get_memories`\n",
+    "\n",
     "4. **Azure Durable Function – Thread Summary** – `generate_thread_summary()` with `CosmosMemoryClient`\n",
+    "\n",
     "5. **Azure Durable Function – Extract Facts** – `extract_facts()` with `CosmosMemoryClient`\n",
+    "\n",
     "6. **Azure Durable Function – User Summary** – `generate_user_summary()` and `get_user_summary()` with `CosmosMemoryClient`\n",
+    "\n",
     "7. **Vector search** – `search_cosmos()` with `CosmosMemoryClient`\n",
+    "\n",
     "8. **Automatic processing (Change Feed)** – Write turns and let the change feed trigger process them automatically\n",
     "\n",
-    "> **Local hosting:** Sections 4–7 require the Azure Durable Functions host running locally via `func start` (see [local_testing.md](../Docs/local_testing.md) for setup). Section 8 additionally requires the change feed settings configured in `local.settings.json`. To run against a deployed Function App instead, update `ADF_ENDPOINT` in your `.env` file.\n",
+    "\n",
+    "\n",
+    "> **Local hosting:** Sections 4–7 require the Azure Durable Functions host running locally via `func start` (see [local_testing.md](../Docs/local_testing.md) for setup). Section 8 additionally requires the change feed settings configured in `local.settings.json`.\n",
+    "\n",
+    ">\n",
+    "\n",
+    "> **Cosmos provisioning:** `serverless` is the default throughput mode. If you set `COSMOS_DB_THROUGHPUT_MODE=autoscale`, the toolkit applies the shared `COSMOS_DB_AUTOSCALE_MAX_RU` cap to the `memories`, `counter`, and `leases` containers created by `create_memory_store()`.\n",
+    "\n",
+    "\n",
     "\n",
     "> For the **async** API (`AsyncCosmosMemoryClient`), see [Demo_async.ipynb](Demo_async.ipynb)."
    ]
@@ -46,21 +65,44 @@
    "outputs": [],
    "source": [
     "import os, json\n",
+    "\n",
     "from dotenv import load_dotenv\n",
+    "\n",
     "from azure.identity import DefaultAzureCredential\n",
     "\n",
+    "\n",
+    "\n",
     "# Add parent directory to path so we can import the package easily\n",
+    "\n",
     "import sys\n",
+    "\n",
     "sys.path.insert(0, os.path.abspath(\"..\"))\n",
     "\n",
+    "\n",
+    "\n",
     "from agent_memory_toolkit import CosmosMemoryClient\n",
     "\n",
+    "\n",
+    "\n",
     "# Load environment variables from .env in the repo root\n",
+    "\n",
     "load_dotenv(os.path.join(\"..\", \".env\"))\n",
     "\n",
+    "\n",
+    "\n",
     "print(\"COSMOS_DB_ENDPOINT:\", os.getenv(\"COSMOS_DB_ENDPOINT\"))\n",
+    "\n",
     "print(\"COSMOS_DB_DATABASE:\", os.getenv(\"COSMOS_DB_DATABASE\"))\n",
-    "print(\"COSMOS_DB_CONTAINER:\", os.getenv(\"COSMOS_DB_CONTAINER\"))"
+    "\n",
+    "print(\"COSMOS_DB_CONTAINER:\", os.getenv(\"COSMOS_DB_CONTAINER\"))\n",
+    "\n",
+    "print(\"COSMOS_DB_COUNTERS_CONTAINER:\", os.getenv(\"COSMOS_DB_COUNTERS_CONTAINER\", \"counter\"))\n",
+    "\n",
+    "print(\"COSMOS_DB_LEASE_CONTAINER:\", os.getenv(\"COSMOS_DB_LEASE_CONTAINER\", \"leases\"))\n",
+    "\n",
+    "print(\"COSMOS_DB_THROUGHPUT_MODE:\", os.getenv(\"COSMOS_DB_THROUGHPUT_MODE\", \"serverless\"))\n",
+    "\n",
+    "print(\"COSMOS_DB_AUTOSCALE_MAX_RU:\", os.getenv(\"COSMOS_DB_AUTOSCALE_MAX_RU\", \"1000\"))"
    ]
   },
   {
@@ -76,19 +118,43 @@
    "outputs": [],
    "source": [
     "# Create a CosmosMemoryClient instance\n",
+    "\n",
     "memory = CosmosMemoryClient(\n",
+    "\n",
     "    cosmos_endpoint=os.getenv(\"COSMOS_DB_ENDPOINT\"),\n",
+    "\n",
     "    cosmos_database=os.getenv(\"COSMOS_DB_DATABASE\"),\n",
+    "\n",
     "    cosmos_container=os.getenv(\"COSMOS_DB_CONTAINER\"),\n",
+    "\n",
+    "    cosmos_counter_container=os.getenv(\"COSMOS_DB_COUNTERS_CONTAINER\", \"counter\"),\n",
+    "\n",
+    "    cosmos_lease_container=os.getenv(\"COSMOS_DB_LEASE_CONTAINER\", \"leases\"),\n",
+    "\n",
+    "    cosmos_throughput_mode=os.getenv(\"COSMOS_DB_THROUGHPUT_MODE\", \"serverless\"),\n",
+    "\n",
+    "    cosmos_autoscale_max_ru=int(os.getenv(\"COSMOS_DB_AUTOSCALE_MAX_RU\", \"1000\")),\n",
+    "\n",
     "    ai_foundry_endpoint=os.getenv(\"AI_FOUNDRY_ENDPOINT\"),\n",
+    "\n",
     "    embedding_model=os.getenv(\"EMBEDDING_MODEL\", \"text-embedding-3-large\"),\n",
+    "\n",
     "    adf_endpoint=os.getenv(\"ADF_ENDPOINT\", \"http://localhost:7071/api\"),\n",
+    "\n",
     "    adf_key=os.getenv(\"ADF_KEY\", \"\"),\n",
+    "\n",
     "    use_default_credential=True,\n",
+    "\n",
     "    cosmos_credential=DefaultAzureCredential()\n",
+    "\n",
     ")\n",
     "\n",
+    "\n",
+    "\n",
     "print(\"CosmosMemoryClient instance created\")\n",
+    "\n",
+    "print(\"Throughput mode:\", os.getenv(\"COSMOS_DB_THROUGHPUT_MODE\", \"serverless\"))\n",
+    "\n",
     "print(\"Local memory store:\", memory.local_memory)"
    ]
   },
@@ -812,15 +878,29 @@
    "source": [
     "### 8. Automatic Processing (Change Feed)\n",
     "\n",
+    "\n",
+    "\n",
     "The toolkit includes a Cosmos DB change feed trigger that automatically fires thread summaries, fact extraction, and user summaries when configurable message count thresholds are crossed.\n",
     "\n",
+    "\n",
+    "\n",
     "**Prerequisites:**\n",
+    "\n",
     "- The Azure Functions host must be running (`func start`)\n",
+    "\n",
     "- `local.settings.json` must include change feed settings:\n",
+    "\n",
     "  - `COSMOS_DB__accountEndpoint` pointing to your Cosmos account\n",
+    "\n",
     "  - `COSMOS_DB_COUNTERS_CONTAINER` set to `\"counter\"`\n",
+    "\n",
+    "  - `COSMOS_DB_LEASE_CONTAINER` set to `\"leases\"`\n",
+    "\n",
     "  - At least one threshold > 0 (e.g. `THREAD_SUMMARY_EVERY_N=3`)\n",
-    "- A `counter` container must exist in the same database\n",
+    "\n",
+    "- `create_memory_store()` must have provisioned the `counter` and `leases` containers in the same database\n",
+    "\n",
+    "\n",
     "\n",
     "The cells below write enough turns to cross the threshold, then poll for auto-generated derived memories."
    ]
diff --git a/Samples/Demo_async.ipynb b/Samples/Demo_async.ipynb
index 2d40494..fc0d286 100644
--- a/Samples/Demo_async.ipynb
+++ b/Samples/Demo_async.ipynb
@@ -7,18 +7,37 @@
    "source": [
     "# Agent Memory Toolkit – Async Demo\n",
     "\n",
+    "\n",
+    "\n",
     "This notebook demonstrates the **async** API of the Agent Memory Toolkit via `AsyncCosmosMemoryClient`:\n",
     "\n",
+    "\n",
+    "\n",
     "1. [**Setup**](#1-setup) – Install dependencies and load environment variables\n",
+    "\n",
     "2. [**Local memory operations**](#2-local-memory-operations) – `add_local`, `get_local`, `update_local`, `delete_local`\n",
+    "\n",
     "3. [**Cosmos DB operations**](#3-cosmos-db-operations) – `add_cosmos`, `get_memories`\n",
+    "\n",
     "4. [**Summarization**](#4-summarization) – `generate_thread_summary()` with `AsyncCosmosMemoryClient`\n",
+    "\n",
     "5. [**Fact Extraction**](#5-fact-extraction) – `extract_facts()` with `AsyncCosmosMemoryClient`\n",
+    "\n",
     "6. [**User Summary**](#6-user-summary) – `generate_user_summary()` and `get_user_summary()` with `AsyncCosmosMemoryClient`\n",
+    "\n",
     "7. [**Vector search**](#7-vector-search-with-`search_cosmos`) – `search_cosmos()` with `AsyncCosmosMemoryClient`\n",
+    "\n",
     "8. [**Automatic processing (Change Feed)**](#8-automatic-processing-change-feed) – Write turns and let the change feed trigger process them automatically\n",
     "\n",
-    "> **Local hosting:** Sections 4–7 require the Azure Durable Functions host running locally via `func start` (see [local_testing.md](../Docs/local_testing.md) for setup). Section 8 additionally requires the change feed settings configured in `local.settings.json`. To run against a deployed Function App instead, update `ADF_ENDPOINT` in your `.env` file.\n",
+    "\n",
+    "\n",
+    "> **Local hosting:** Sections 4–7 require the Azure Durable Functions host running locally via `func start` (see [local_testing.md](../Docs/local_testing.md) for setup). Section 8 additionally requires the change feed settings configured in `local.settings.json`.\n",
+    "\n",
+    ">\n",
+    "\n",
+    "> **Cosmos provisioning:** `serverless` is the default throughput mode. If you set `COSMOS_DB_THROUGHPUT_MODE=autoscale`, the toolkit applies the shared `COSMOS_DB_AUTOSCALE_MAX_RU` cap to the `memories`, `counter`, and `leases` containers created by `create_memory_store()`.\n",
+    "\n",
+    "\n",
     "\n",
     "> For the **sync** API (`CosmosMemoryClient`), see [Demo.ipynb](Demo.ipynb)."
    ]
@@ -46,18 +65,38 @@
    "outputs": [],
    "source": [
     "import os, json, sys\n",
+    "\n",
     "from pathlib import Path\n",
+    "\n",
     "sys.path.insert(0, os.path.abspath(\"..\"))\n",
     "\n",
+    "\n",
+    "\n",
     "from dotenv import load_dotenv\n",
+    "\n",
     "from agent_memory_toolkit.aio import AsyncCosmosMemoryClient\n",
     "\n",
+    "\n",
+    "\n",
     "# Load environment variables from .env in the repo root\n",
+    "\n",
     "load_dotenv(os.path.join(\"..\", \".env\"))\n",
     "\n",
+    "\n",
+    "\n",
     "print(\"COSMOS_DB_ENDPOINT:\", os.getenv(\"COSMOS_DB_ENDPOINT\"))\n",
+    "\n",
     "print(\"COSMOS_DB_DATABASE:\", os.getenv(\"COSMOS_DB_DATABASE\"))\n",
-    "print(\"COSMOS_DB_CONTAINER:\", os.getenv(\"COSMOS_DB_CONTAINER\"))"
+    "\n",
+    "print(\"COSMOS_DB_CONTAINER:\", os.getenv(\"COSMOS_DB_CONTAINER\"))\n",
+    "\n",
+    "print(\"COSMOS_DB_COUNTERS_CONTAINER:\", os.getenv(\"COSMOS_DB_COUNTERS_CONTAINER\", \"counter\"))\n",
+    "\n",
+    "print(\"COSMOS_DB_LEASE_CONTAINER:\", os.getenv(\"COSMOS_DB_LEASE_CONTAINER\", \"leases\"))\n",
+    "\n",
+    "print(\"COSMOS_DB_THROUGHPUT_MODE:\", os.getenv(\"COSMOS_DB_THROUGHPUT_MODE\", \"serverless\"))\n",
+    "\n",
+    "print(\"COSMOS_DB_AUTOSCALE_MAX_RU:\", os.getenv(\"COSMOS_DB_AUTOSCALE_MAX_RU\", \"1000\"))"
    ]
   },
   {
@@ -74,20 +113,46 @@
    "source": [
     "from azure.identity.aio import DefaultAzureCredential as AsyncDefaultAzureCredential\n",
     "\n",
+    "\n",
+    "\n",
     "# Create an AsyncCosmosMemoryClient instance\n",
+    "\n",
     "memory = AsyncCosmosMemoryClient(\n",
+    "\n",
     "    cosmos_endpoint=os.getenv(\"COSMOS_DB_ENDPOINT\"),\n",
+    "\n",
     "    cosmos_database=os.getenv(\"COSMOS_DB_DATABASE\"),\n",
+    "\n",
     "    cosmos_container=os.getenv(\"COSMOS_DB_CONTAINER\"),\n",
+    "\n",
+    "    cosmos_counter_container=os.getenv(\"COSMOS_DB_COUNTERS_CONTAINER\", \"counter\"),\n",
+    "\n",
+    "    cosmos_lease_container=os.getenv(\"COSMOS_DB_LEASE_CONTAINER\", \"leases\"),\n",
+    "\n",
+    "    cosmos_throughput_mode=os.getenv(\"COSMOS_DB_THROUGHPUT_MODE\", \"serverless\"),\n",
+    "\n",
+    "    cosmos_autoscale_max_ru=int(os.getenv(\"COSMOS_DB_AUTOSCALE_MAX_RU\", \"1000\")),\n",
+    "\n",
     "    ai_foundry_endpoint=os.getenv(\"AI_FOUNDRY_ENDPOINT\"),\n",
+    "\n",
     "    embedding_model=os.getenv(\"EMBEDDING_MODEL\", \"text-embedding-3-large\"),\n",
+    "\n",
     "    adf_endpoint=os.getenv(\"ADF_ENDPOINT\", \"http://localhost:7071/api\"),\n",
+    "\n",
     "    adf_key=os.getenv(\"ADF_KEY\", \"\"),\n",
+    "\n",
     "    use_default_credential=True,\n",
+    "\n",
     "    cosmos_credential=AsyncDefaultAzureCredential(),\n",
+    "\n",
     ")\n",
     "\n",
+    "\n",
+    "\n",
     "print(\"AsyncCosmosMemoryClient instance created\")\n",
+    "\n",
+    "print(\"Throughput mode:\", os.getenv(\"COSMOS_DB_THROUGHPUT_MODE\", \"serverless\"))\n",
+    "\n",
     "print(\"Local memory store:\", memory.local_memory)"
    ]
   },
@@ -732,15 +797,29 @@
    "source": [
     "### 8. Automatic Processing (Change Feed)\n",
     "\n",
+    "\n",
+    "\n",
     "The toolkit includes a Cosmos DB change feed trigger that automatically fires thread summaries, fact extraction, and user summaries when configurable message count thresholds are crossed.\n",
     "\n",
+    "\n",
+    "\n",
     "**Prerequisites:**\n",
+    "\n",
     "- The Azure Functions host must be running (`func start`)\n",
+    "\n",
     "- `local.settings.json` must include change feed settings:\n",
-    "  - `COSMOS_DB__accountEndpoint` pointing to your Cosmos account\\n\n",
-    "  - `COSMOS_DB_COUNTERS_CONTAINER` set to `\"counters\"`\n",
+    "\n",
+    "  - `COSMOS_DB__accountEndpoint` pointing to your Cosmos account\n",
+    "\n",
+    "  - `COSMOS_DB_COUNTERS_CONTAINER` set to `\"counter\"`\n",
+    "\n",
+    "  - `COSMOS_DB_LEASE_CONTAINER` set to `\"leases\"`\n",
+    "\n",
     "  - At least one threshold > 0 (e.g. `THREAD_SUMMARY_EVERY_N=3`)\n",
-    "- A `counters` container must exist in the same database (partition key: `/user_id`)\n",
+    "\n",
+    "- `create_memory_store()` must have provisioned the `counter` and `leases` containers in the same database\n",
+    "\n",
+    "\n",
     "\n",
     "The cells below write enough turns to cross the threshold, then poll for auto-generated derived memories."
    ]
diff --git a/agent_memory_toolkit/_utils.py b/agent_memory_toolkit/_utils.py
index 50a30f9..e9acb4d 100644
--- a/agent_memory_toolkit/_utils.py
+++ b/agent_memory_toolkit/_utils.py
@@ -69,6 +69,94 @@ def _resolve_embedding_dimensions(val: Optional[int]) -> Optional[int]:
     return parsed if parsed else None
 
 
+def _resolve_cosmos_throughput_mode(val: Optional[str]) -> str:
+    """Resolve throughput mode from explicit value or env var.
+
+    Allowed values are ``serverless`` and ``autoscale``.
+    """
+    raw = (val if val is not None else os.environ.get("COSMOS_DB_THROUGHPUT_MODE") or "serverless").strip().lower()
+
+    if raw not in {"serverless", "autoscale"}:
+        raise ConfigurationError(
+            message=(
+                f"Invalid configuration for cosmos_throughput_mode: expected 'serverless' or 'autoscale', got '{raw}'"
+            ),
+            parameter="cosmos_throughput_mode",
+        )
+    return raw
+
+
+def _resolve_cosmos_autoscale_max_ru(val: Optional[int]) -> int:
+    """Resolve autoscale max RU from explicit value or env var."""
+    if val is not None:
+        if not isinstance(val, int) or isinstance(val, bool) or val <= 0:
+            raise ConfigurationError(
+                message=f"Invalid configuration for cosmos_autoscale_max_ru: expected a positive integer, got '{val}'",
+                parameter="cosmos_autoscale_max_ru",
+            )
+        return val
+    raw = (os.environ.get("COSMOS_DB_AUTOSCALE_MAX_RU") or "1000").strip()
+    try:
+        parsed = int(raw)
+    except ValueError as exc:
+        raise ConfigurationError(
+            message=(f"Invalid configuration for cosmos_autoscale_max_ru: expected an integer, got '{raw}'"),
+            parameter="cosmos_autoscale_max_ru",
+        ) from exc
+    if parsed <= 0:
+        raise ConfigurationError(
+            message=(f"Invalid configuration for cosmos_autoscale_max_ru: expected a positive integer, got '{raw}'"),
+            parameter="cosmos_autoscale_max_ru",
+        )
+    return parsed
+
+
+def _resolve_cosmos_provisioning_autoscale_max_ru(
+    *,
+    throughput_mode: str,
+    autoscale_max_ru: Optional[int],
+) -> Optional[int]:
+    """Resolve autoscale max RU only when autoscale throughput is enabled."""
+    if throughput_mode != "autoscale":
+        return None
+    return _resolve_cosmos_autoscale_max_ru(autoscale_max_ru)
+
+
+def _cosmos_container_offer_throughput(
+    *,
+    throughput_mode: str,
+    autoscale_max_ru: Optional[int],
+    throughput_properties_cls: Any,
+) -> Any:
+    """Return ``None`` for serverless mode or a throughput properties instance for autoscale mode."""
+    if throughput_mode == "serverless":
+        return None
+    if autoscale_max_ru is None:
+        raise ConfigurationError(
+            message=("Invalid configuration for cosmos_autoscale_max_ru: autoscale mode requires a positive integer"),
+            parameter="cosmos_autoscale_max_ru",
+        )
+    return throughput_properties_cls(auto_scale_max_throughput=autoscale_max_ru)
+
+
+def _build_container_kwargs(
+    *,
+    container_id: str,
+    partition_key: Any,
+    offer_throughput: Optional[Any],
+    **extras: Any,
+) -> dict[str, Any]:
+    """Build kwargs for ``create_container_if_not_exists`` with optional throughput."""
+    kwargs: dict[str, Any] = {
+        "id": container_id,
+        "partition_key": partition_key,
+        **extras,
+    }
+    if offer_throughput is not None:
+        kwargs["offer_throughput"] = offer_throughput
+    return kwargs
+
+
 # ---------------------------------------------------------------------------
 # Connection / query helpers (shared by sync & async Cosmos clients)
 # ---------------------------------------------------------------------------
diff --git a/agent_memory_toolkit/aio/cosmos_memory_client.py b/agent_memory_toolkit/aio/cosmos_memory_client.py
index 7e975d6..3c571b7 100644
--- a/agent_memory_toolkit/aio/cosmos_memory_client.py
+++ b/agent_memory_toolkit/aio/cosmos_memory_client.py
@@ -19,9 +19,13 @@
 from agent_memory_toolkit._utils import (
     VALID_ROLES,
     VALID_TYPES,
+    _build_container_kwargs,
     _build_memory_query_builder,
     _container_policies,
+    _cosmos_container_offer_throughput,
     _make_memory,
+    _resolve_cosmos_provisioning_autoscale_max_ru,
+    _resolve_cosmos_throughput_mode,
     _resolve_embedding_dimensions,
     _validate_connection,
     _validate_hybrid_search,
@@ -62,6 +66,10 @@ def __init__(
         cosmos_credential: Optional[Any] = None,
         cosmos_database: Optional[str] = None,
         cosmos_container: Optional[str] = None,
+        cosmos_counter_container: Optional[str] = None,
+        cosmos_lease_container: Optional[str] = None,
+        cosmos_throughput_mode: Optional[str] = None,
+        cosmos_autoscale_max_ru: Optional[int] = None,
         ai_foundry_endpoint: Optional[str] = None,
         ai_foundry_credential: Optional[Any] = None,
         ai_foundry_api_key: Optional[str] = None,
@@ -79,6 +87,14 @@ def __init__(
         self._cosmos_credential = cosmos_credential
         self._cosmos_database = cosmos_database or "ai_memory"
         self._cosmos_container = cosmos_container or "memories"
+        self._cosmos_counter_container = cosmos_counter_container or "counter"
+        self._cosmos_lease_container = cosmos_lease_container or "leases"
+        self._cosmos_throughput_mode = _resolve_cosmos_throughput_mode(cosmos_throughput_mode)
+        self._cosmos_throughput_mode = _resolve_cosmos_throughput_mode(cosmos_throughput_mode)
+        self._cosmos_autoscale_max_ru = _resolve_cosmos_provisioning_autoscale_max_ru(
+            throughput_mode=self._cosmos_throughput_mode,
+            autoscale_max_ru=cosmos_autoscale_max_ru,
+        )
 
         self._ai_foundry_endpoint = ai_foundry_endpoint
         self._ai_foundry_credential = ai_foundry_credential
@@ -326,15 +342,16 @@ async def create_memory_store(
         self,
         database: Optional[str] = None,
         container: Optional[str] = None,
-        counter_container: str = "counter",
+        counter_container: Optional[str] = None,
+        lease_container: Optional[str] = None,
         endpoint: Optional[str] = None,
         credential: Optional[Any] = None,
         embedding_dimensions: Optional[int] = None,
         embedding_data_type: Optional[str] = None,
         distance_function: Optional[str] = None,
         full_text_language: Optional[str] = None,
-        autoscale_max_ru: int = 1000,
-        counter_autoscale_max_ru: int = 1000,
+        throughput_mode: Optional[str] = None,
+        autoscale_max_ru: Optional[int] = None,
     ) -> None:
         """Create the Cosmos DB database and container for memories (async).
 
@@ -346,16 +363,26 @@ async def create_memory_store(
         * Hierarchical partition key ``[/user_id, /thread_id]``
         * ``quantizedFlat`` vector index on ``/embedding``
         * Full-text index on ``/content``
-        * Autoscale throughput (max RU from *autoscale_max_ru*)
+        * Throughput behavior controlled by *throughput_mode*
 
-        A separate counter container is also provisioned with the same
-        partition key and autoscale throughput capped by
-        *counter_autoscale_max_ru*.
+        Separate counter and lease containers are also provisioned.
+        In ``serverless`` mode no RU/s throughput is specified.
+        In ``autoscale`` mode all required containers use the same
+        autoscale max RU from *autoscale_max_ru*.
         """
         self._cosmos_endpoint = endpoint or self._cosmos_endpoint
         self._cosmos_credential = credential or self._cosmos_credential
         self._cosmos_database = database or self._cosmos_database
         self._cosmos_container = container or self._cosmos_container
+        self._cosmos_counter_container = counter_container or self._cosmos_counter_container
+        self._cosmos_lease_container = lease_container or self._cosmos_lease_container
+        self._cosmos_throughput_mode = _resolve_cosmos_throughput_mode(
+            throughput_mode if throughput_mode is not None else self._cosmos_throughput_mode
+        )
+        self._cosmos_autoscale_max_ru = _resolve_cosmos_provisioning_autoscale_max_ru(
+            throughput_mode=self._cosmos_throughput_mode,
+            autoscale_max_ru=(autoscale_max_ru if autoscale_max_ru is not None else self._cosmos_autoscale_max_ru),
+        )
 
         _validate_connection(
             self._cosmos_endpoint,
@@ -373,29 +400,44 @@ async def create_memory_store(
             db = await client.create_database_if_not_exists(id=self._cosmos_database)
 
             partition_key = PartitionKey(path=["/user_id", "/thread_id"], kind="MultiHash")
+            lease_partition_key = PartitionKey(path="/id")
             vec_policy, idx_policy, ft_policy = _container_policies(
                 embedding_dimensions=embedding_dimensions or self._embedding_dimensions or 1536,
                 embedding_data_type=embedding_data_type or "float32",
                 distance_function=distance_function or "cosine",
                 full_text_language=full_text_language or "en-US",
             )
+            offer_throughput = _cosmos_container_offer_throughput(
+                throughput_mode=self._cosmos_throughput_mode,
+                autoscale_max_ru=self._cosmos_autoscale_max_ru,
+                throughput_properties_cls=ThroughputProperties,
+            )
 
             container_handle = await db.create_container_if_not_exists(
-                id=self._cosmos_container,
-                partition_key=partition_key,
-                indexing_policy=idx_policy,
-                vector_embedding_policy=vec_policy,
-                full_text_policy=ft_policy,
-                offer_throughput=ThroughputProperties(
-                    auto_scale_max_throughput=autoscale_max_ru,
-                ),
+                **_build_container_kwargs(
+                    container_id=self._cosmos_container,
+                    partition_key=partition_key,
+                    offer_throughput=offer_throughput,
+                    indexing_policy=idx_policy,
+                    vector_embedding_policy=vec_policy,
+                    full_text_policy=ft_policy,
+                )
             )
+
             await db.create_container_if_not_exists(
-                id=counter_container,
-                partition_key=partition_key,
-                offer_throughput=ThroughputProperties(
-                    auto_scale_max_throughput=counter_autoscale_max_ru,
-                ),
+                **_build_container_kwargs(
+                    container_id=self._cosmos_counter_container,
+                    partition_key=partition_key,
+                    offer_throughput=offer_throughput,
+                )
+            )
+
+            await db.create_container_if_not_exists(
+                **_build_container_kwargs(
+                    container_id=self._cosmos_lease_container,
+                    partition_key=lease_partition_key,
+                    offer_throughput=offer_throughput,
+                )
             )
             self._cosmos_client = client
             self._container_client = container_handle
@@ -403,10 +445,11 @@ async def create_memory_store(
             raise CosmosOperationError(f"Failed to create memory store (async): {exc}") from exc
 
         logger.info(
-            "Async created memory store %s/%s with counter container %s",
+            "Async created memory store %s/%s with counter container %s and lease container %s",
             self._cosmos_database,
             self._cosmos_container,
-            counter_container,
+            self._cosmos_counter_container,
+            self._cosmos_lease_container,
         )
 
     async def _require_cosmos(self) -> None:
diff --git a/agent_memory_toolkit/cosmos_memory_client.py b/agent_memory_toolkit/cosmos_memory_client.py
index 037a7f8..a845444 100644
--- a/agent_memory_toolkit/cosmos_memory_client.py
+++ b/agent_memory_toolkit/cosmos_memory_client.py
@@ -15,9 +15,13 @@
 from ._utils import (
     VALID_ROLES,
     VALID_TYPES,
+    _build_container_kwargs,
     _build_memory_query_builder,
     _container_policies,
+    _cosmos_container_offer_throughput,
     _make_memory,
+    _resolve_cosmos_provisioning_autoscale_max_ru,
+    _resolve_cosmos_throughput_mode,
     _resolve_embedding_dimensions,
     _validate_connection,
     _validate_hybrid_search,
@@ -76,6 +80,10 @@ def __init__(
         cosmos_credential: Optional[Any] = None,
         cosmos_database: Optional[str] = None,
         cosmos_container: Optional[str] = None,
+        cosmos_counter_container: Optional[str] = None,
+        cosmos_lease_container: Optional[str] = None,
+        cosmos_throughput_mode: Optional[str] = None,
+        cosmos_autoscale_max_ru: Optional[int] = None,
         ai_foundry_endpoint: Optional[str] = None,
         ai_foundry_credential: Optional[Any] = None,
         ai_foundry_api_key: Optional[str] = None,
@@ -93,6 +101,13 @@ def __init__(
         self._cosmos_credential = cosmos_credential
         self._cosmos_database = cosmos_database or "ai_memory"
         self._cosmos_container = cosmos_container or "memories"
+        self._cosmos_counter_container = cosmos_counter_container or "counter"
+        self._cosmos_lease_container = cosmos_lease_container or "leases"
+        self._cosmos_throughput_mode = _resolve_cosmos_throughput_mode(cosmos_throughput_mode)
+        self._cosmos_autoscale_max_ru = _resolve_cosmos_provisioning_autoscale_max_ru(
+            throughput_mode=self._cosmos_throughput_mode,
+            autoscale_max_ru=cosmos_autoscale_max_ru,
+        )
 
         self._ai_foundry_endpoint = ai_foundry_endpoint
         self._ai_foundry_credential = ai_foundry_credential
@@ -336,15 +351,16 @@ def create_memory_store(
         self,
         database: Optional[str] = None,
         container: Optional[str] = None,
-        counter_container: str = "counter",
+        counter_container: Optional[str] = None,
+        lease_container: Optional[str] = None,
         endpoint: Optional[str] = None,
         credential: Optional[Any] = None,
         embedding_dimensions: Optional[int] = None,
         embedding_data_type: Optional[str] = None,
         distance_function: Optional[str] = None,
         full_text_language: Optional[str] = None,
-        autoscale_max_ru: int = 1000,
-        counter_autoscale_max_ru: int = 1000,
+        throughput_mode: Optional[str] = None,
+        autoscale_max_ru: Optional[int] = None,
     ) -> None:
         """Create the Cosmos DB database and container for memories.
 
@@ -356,16 +372,26 @@ def create_memory_store(
         * Hierarchical partition key ``[/user_id, /thread_id]``
         * ``quantizedFlat`` vector index on ``/embedding``
         * Full-text index on ``/content``
-        * Autoscale throughput (max RU from *autoscale_max_ru*)
+        * Throughput behavior controlled by *throughput_mode*
 
-        A separate counter container is also provisioned with the same
-        partition key and autoscale throughput capped by
-        *counter_autoscale_max_ru*.
+        Separate counter and lease containers are also provisioned.
+        In ``serverless`` mode no RU/s throughput is specified.
+        In ``autoscale`` mode all required containers use the same
+        autoscale max RU from *autoscale_max_ru*.
         """
         self._cosmos_endpoint = endpoint or self._cosmos_endpoint
         self._cosmos_credential = credential or self._cosmos_credential
         self._cosmos_database = database or self._cosmos_database
         self._cosmos_container = container or self._cosmos_container
+        self._cosmos_counter_container = counter_container or self._cosmos_counter_container
+        self._cosmos_lease_container = lease_container or self._cosmos_lease_container
+        self._cosmos_throughput_mode = _resolve_cosmos_throughput_mode(
+            throughput_mode if throughput_mode is not None else self._cosmos_throughput_mode
+        )
+        self._cosmos_autoscale_max_ru = _resolve_cosmos_provisioning_autoscale_max_ru(
+            throughput_mode=self._cosmos_throughput_mode,
+            autoscale_max_ru=(autoscale_max_ru if autoscale_max_ru is not None else self._cosmos_autoscale_max_ru),
+        )
 
         _validate_connection(
             self._cosmos_endpoint,
@@ -382,29 +408,44 @@ def create_memory_store(
             db = client.create_database_if_not_exists(id=self._cosmos_database)
 
             partition_key = PartitionKey(path=["/user_id", "/thread_id"], kind="MultiHash")
+            lease_partition_key = PartitionKey(path="/id")
             vec_policy, idx_policy, ft_policy = _container_policies(
                 embedding_dimensions=embedding_dimensions or self._embedding_dimensions or 1536,
                 embedding_data_type=embedding_data_type or "float32",
                 distance_function=distance_function or "cosine",
                 full_text_language=full_text_language or "en-US",
             )
+            offer_throughput = _cosmos_container_offer_throughput(
+                throughput_mode=self._cosmos_throughput_mode,
+                autoscale_max_ru=self._cosmos_autoscale_max_ru,
+                throughput_properties_cls=ThroughputProperties,
+            )
 
             container_handle = db.create_container_if_not_exists(
-                id=self._cosmos_container,
-                partition_key=partition_key,
-                indexing_policy=idx_policy,
-                vector_embedding_policy=vec_policy,
-                full_text_policy=ft_policy,
-                offer_throughput=ThroughputProperties(
-                    auto_scale_max_throughput=autoscale_max_ru,
-                ),
+                **_build_container_kwargs(
+                    container_id=self._cosmos_container,
+                    partition_key=partition_key,
+                    offer_throughput=offer_throughput,
+                    indexing_policy=idx_policy,
+                    vector_embedding_policy=vec_policy,
+                    full_text_policy=ft_policy,
+                )
             )
+
             db.create_container_if_not_exists(
-                id=counter_container,
-                partition_key=partition_key,
-                offer_throughput=ThroughputProperties(
-                    auto_scale_max_throughput=counter_autoscale_max_ru,
-                ),
+                **_build_container_kwargs(
+                    container_id=self._cosmos_counter_container,
+                    partition_key=partition_key,
+                    offer_throughput=offer_throughput,
+                )
+            )
+
+            db.create_container_if_not_exists(
+                **_build_container_kwargs(
+                    container_id=self._cosmos_lease_container,
+                    partition_key=lease_partition_key,
+                    offer_throughput=offer_throughput,
+                )
             )
             self._cosmos_client = client
             self._container_client = container_handle
@@ -412,10 +453,11 @@ def create_memory_store(
             raise CosmosOperationError(f"Failed to create memory store: {exc}") from exc
 
         logger.info(
-            "Created memory store %s/%s with counter container %s",
+            "Created memory store %s/%s with counter container %s and lease container %s",
             self._cosmos_database,
             self._cosmos_container,
-            counter_container,
+            self._cosmos_counter_container,
+            self._cosmos_lease_container,
         )
 
     def _require_cosmos(self) -> None:
diff --git a/azure_functions/local.settings.json.template b/azure_functions/local.settings.json.template
index e9a7ccf..bebc1da 100644
--- a/azure_functions/local.settings.json.template
+++ b/azure_functions/local.settings.json.template
@@ -10,6 +10,8 @@
     "COSMOS_DB_CONTAINER": "memories",
     "COSMOS_DB_LEASE_CONTAINER": "leases",
     "COSMOS_DB_COUNTERS_CONTAINER": "counter",
+    "COSMOS_DB_THROUGHPUT_MODE": "serverless",
+    "COSMOS_DB_AUTOSCALE_MAX_RU": "1000",
 
     "THREAD_SUMMARY_EVERY_N": "0",
     "FACT_EXTRACTION_EVERY_N": "0",
diff --git a/tests/unit/aio/test_cosmos_memory_client.py b/tests/unit/aio/test_cosmos_memory_client.py
index 9a9112f..da738b0 100644
--- a/tests/unit/aio/test_cosmos_memory_client.py
+++ b/tests/unit/aio/test_cosmos_memory_client.py
@@ -10,6 +10,7 @@
 
 from agent_memory_toolkit.aio.cosmos_memory_client import AsyncCosmosMemoryClient
 from agent_memory_toolkit.exceptions import (
+    ConfigurationError,
     CosmosNotConnectedError,
     MemoryNotFoundError,
     ValidationError,
@@ -240,12 +241,13 @@ async def test_create_memory_store_with_counter_container(self):
         mock_db = AsyncMock()
         mock_memories_container = MagicMock()
         mock_counter_container = MagicMock()
+        mock_lease_container = MagicMock()
         mock_throughput_cls = MagicMock(side_effect=lambda **kwargs: type("Throughput", (), kwargs)())
 
         mock_cosmos_cls.return_value = mock_client
         mock_client.create_database_if_not_exists = AsyncMock(return_value=mock_db)
         mock_db.create_container_if_not_exists = AsyncMock(
-            side_effect=[mock_memories_container, mock_counter_container]
+            side_effect=[mock_memories_container, mock_counter_container, mock_lease_container]
         )
 
         with patch.dict(
@@ -262,21 +264,70 @@ async def test_create_memory_store_with_counter_container(self):
                 endpoint="https://fake.documents.azure.com:443/",
                 credential="fake-key",
                 embedding_dimensions=256,
-                counter_autoscale_max_ru=1000,
+                throughput_mode="autoscale",
+                autoscale_max_ru=1000,
             )
 
         mock_client.create_database_if_not_exists.assert_awaited_once()
         memories_call = mock_db.create_container_if_not_exists.await_args_list[0]
         counter_call = mock_db.create_container_if_not_exists.await_args_list[1]
+        lease_call = mock_db.create_container_if_not_exists.await_args_list[2]
         vec_policy = memories_call.kwargs["vector_embedding_policy"]
         assert vec_policy["vectorEmbeddings"][0]["dimensions"] == 256
         ft_policy = memories_call.kwargs["full_text_policy"]
         assert ft_policy["defaultLanguage"] == "en-US"
         assert counter_call.kwargs["id"] == "counter"
         assert counter_call.kwargs["offer_throughput"].auto_scale_max_throughput == 1000
+        assert lease_call.kwargs["id"] == "leases"
+        assert lease_call.kwargs["offer_throughput"].auto_scale_max_throughput == 1000
         assert "vector_embedding_policy" not in counter_call.kwargs
         assert mem._container_client is mock_memories_container
 
+    async def test_create_memory_store_defaults_to_serverless(self):
+        mem = _make_client(cosmos_throughput_mode="serverless")
+        mock_cosmos_cls = MagicMock()
+        mock_client = MagicMock()
+        mock_db = AsyncMock()
+        mock_memories_container = MagicMock()
+        mock_counter_container = MagicMock()
+        mock_lease_container = MagicMock()
+
+        mock_cosmos_cls.return_value = mock_client
+        mock_client.create_database_if_not_exists = AsyncMock(return_value=mock_db)
+        mock_db.create_container_if_not_exists = AsyncMock(
+            side_effect=[mock_memories_container, mock_counter_container, mock_lease_container]
+        )
+
+        with patch.dict("os.environ", {"COSMOS_DB_AUTOSCALE_MAX_RU": "not-an-int"}, clear=False):
+            with patch.dict(
+                "sys.modules",
+                {
+                    "azure.cosmos.aio": MagicMock(CosmosClient=mock_cosmos_cls),
+                    "azure.cosmos": MagicMock(
+                        PartitionKey=MagicMock(),
+                        ThroughputProperties=MagicMock(),
+                    ),
+                },
+            ):
+                await mem.create_memory_store(
+                    endpoint="https://fake.documents.azure.com:443/",
+                    credential="fake-key",
+                    throughput_mode="serverless",
+                )
+
+        for call in mock_db.create_container_if_not_exists.await_args_list:
+            assert "offer_throughput" not in call.kwargs
+
+    def test_constructor_ignores_invalid_autoscale_env_in_serverless_mode(self):
+        with patch.dict("os.environ", {"COSMOS_DB_AUTOSCALE_MAX_RU": "not-an-int"}, clear=False):
+            mem = _make_client(cosmos_throughput_mode="serverless")
+
+        assert mem._cosmos_autoscale_max_ru is None
+
+    def test_constructor_rejects_invalid_throughput_mode(self):
+        with pytest.raises(ConfigurationError, match="expected 'serverless' or 'autoscale'"):
+            _make_client(cosmos_throughput_mode="invalid")
+
 
 class TestRequireCosmos:
     async def test_require_cosmos_before_connect(self):
diff --git a/tests/unit/test_cosmos_memory_client.py b/tests/unit/test_cosmos_memory_client.py
index f3ca806..9771b34 100644
--- a/tests/unit/test_cosmos_memory_client.py
+++ b/tests/unit/test_cosmos_memory_client.py
@@ -10,6 +10,7 @@
 
 from agent_memory_toolkit.cosmos_memory_client import CosmosMemoryClient
 from agent_memory_toolkit.exceptions import (
+    ConfigurationError,
     CosmosNotConnectedError,
     MemoryNotFoundError,
     ValidationError,
@@ -212,11 +213,13 @@ def test_auto_creates_store_when_endpoint_provided(self):
         mock_db = MagicMock()
         mock_memories_container = MagicMock()
         mock_counter_container = MagicMock()
+        mock_lease_container = MagicMock()
         mock_cosmos_cls.return_value = mock_client
         mock_client.create_database_if_not_exists.return_value = mock_db
         mock_db.create_container_if_not_exists.side_effect = [
             mock_memories_container,
             mock_counter_container,
+            mock_lease_container,
         ]
 
         with patch.dict(
@@ -236,7 +239,7 @@ def test_auto_creates_store_when_endpoint_provided(self):
 
         assert mem._container_client is mock_memories_container
         mock_client.create_database_if_not_exists.assert_called_once()
-        assert mock_db.create_container_if_not_exists.call_count == 2
+        assert mock_db.create_container_if_not_exists.call_count == 3
 
 
 class TestRequireCosmos:
@@ -254,12 +257,14 @@ def test_create_memory_store_with_custom_dimensions(self):
         mock_db = MagicMock()
         mock_memories_container = MagicMock()
         mock_counter_container = MagicMock()
+        mock_lease_container = MagicMock()
         mock_throughput_cls = MagicMock(side_effect=lambda **kwargs: type("Throughput", (), kwargs)())
         mock_cosmos_cls.return_value = mock_client
         mock_client.create_database_if_not_exists.return_value = mock_db
         mock_db.create_container_if_not_exists.side_effect = [
             mock_memories_container,
             mock_counter_container,
+            mock_lease_container,
         ]
 
         # Start local-only, then create store explicitly
@@ -279,21 +284,72 @@ def test_create_memory_store_with_custom_dimensions(self):
                 endpoint="https://fake.documents.azure.com:443/",
                 credential="fake-key",
                 embedding_dimensions=256,
-                counter_autoscale_max_ru=1000,
+                throughput_mode="autoscale",
+                autoscale_max_ru=1000,
             )
 
         mock_client.create_database_if_not_exists.assert_called_once()
         memories_call = mock_db.create_container_if_not_exists.call_args_list[0]
         counter_call = mock_db.create_container_if_not_exists.call_args_list[1]
+        lease_call = mock_db.create_container_if_not_exists.call_args_list[2]
         vec_policy = memories_call.kwargs["vector_embedding_policy"]
         assert vec_policy["vectorEmbeddings"][0]["dimensions"] == 256
         ft_policy = memories_call.kwargs["full_text_policy"]
         assert ft_policy["defaultLanguage"] == "en-US"
         assert counter_call.kwargs["id"] == "counter"
         assert counter_call.kwargs["offer_throughput"].auto_scale_max_throughput == 1000
+        assert lease_call.kwargs["id"] == "leases"
+        assert lease_call.kwargs["offer_throughput"].auto_scale_max_throughput == 1000
         assert "vector_embedding_policy" not in counter_call.kwargs
         assert mem._container_client is mock_memories_container
 
+    def test_create_memory_store_defaults_to_serverless(self):
+        mock_cosmos_cls = MagicMock()
+        mock_client = MagicMock()
+        mock_db = MagicMock()
+        mock_memories_container = MagicMock()
+        mock_counter_container = MagicMock()
+        mock_lease_container = MagicMock()
+        mock_cosmos_cls.return_value = mock_client
+        mock_client.create_database_if_not_exists.return_value = mock_db
+        mock_db.create_container_if_not_exists.side_effect = [
+            mock_memories_container,
+            mock_counter_container,
+            mock_lease_container,
+        ]
+
+        mem = _make_client(cosmos_throughput_mode="serverless")
+
+        with patch.dict("os.environ", {"COSMOS_DB_AUTOSCALE_MAX_RU": "not-an-int"}, clear=False):
+            with patch.dict(
+                "sys.modules",
+                {
+                    "azure.cosmos": MagicMock(
+                        CosmosClient=mock_cosmos_cls,
+                        PartitionKey=MagicMock(),
+                        ThroughputProperties=MagicMock(),
+                    ),
+                },
+            ):
+                mem.create_memory_store(
+                    endpoint="https://fake.documents.azure.com:443/",
+                    credential="fake-key",
+                    throughput_mode="serverless",
+                )
+
+        for call in mock_db.create_container_if_not_exists.call_args_list:
+            assert "offer_throughput" not in call.kwargs
+
+    def test_constructor_ignores_invalid_autoscale_env_in_serverless_mode(self):
+        with patch.dict("os.environ", {"COSMOS_DB_AUTOSCALE_MAX_RU": "not-an-int"}, clear=False):
+            mem = _make_client(cosmos_throughput_mode="serverless")
+
+        assert mem._cosmos_autoscale_max_ru is None
+
+    def test_constructor_rejects_invalid_throughput_mode(self):
+        with pytest.raises(ConfigurationError, match="expected 'serverless' or 'autoscale'"):
+            _make_client(cosmos_throughput_mode="invalid")
+
 
 # ===================================================================
 # Cosmos CRUD (mock _container_client)
diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py
new file mode 100644
index 0000000..a4f41a8
--- /dev/null
+++ b/tests/unit/test_utils.py
@@ -0,0 +1,35 @@
+"""Unit tests for shared helpers in agent_memory_toolkit._utils."""
+
+from agent_memory_toolkit._utils import _build_container_kwargs
+
+
+def test_build_container_kwargs_includes_required_fields_and_extras():
+    partition_key = object()
+    throughput = object()
+
+    kwargs = _build_container_kwargs(
+        container_id="memories",
+        partition_key=partition_key,
+        offer_throughput=throughput,
+        indexing_policy={"includedPaths": [{"path": "/*"}]},
+        full_text_policy={"defaultLanguage": "en-US"},
+    )
+
+    assert kwargs["id"] == "memories"
+    assert kwargs["partition_key"] is partition_key
+    assert kwargs["offer_throughput"] is throughput
+    assert kwargs["indexing_policy"] == {"includedPaths": [{"path": "/*"}]}
+    assert kwargs["full_text_policy"] == {"defaultLanguage": "en-US"}
+
+
+def test_build_container_kwargs_omits_offer_throughput_when_none():
+    kwargs = _build_container_kwargs(
+        container_id="leases",
+        partition_key="/id",
+        offer_throughput=None,
+    )
+
+    assert kwargs == {
+        "id": "leases",
+        "partition_key": "/id",
+    }