In [1]:
import logging
import os

# set the directory to the location of the script
try:
    os.chdir("..")
    target_directory = os.getenv(
        "TARGET_DIRECTORY", os.getcwd()
    )  # Use environment variable if available
    if os.path.exists(target_directory):
        os.chdir(target_directory)
        print(f"Changed directory to: {os.getcwd()}")
        logging.info(f"Successfully changed directory to: {os.getcwd()}")
    else:
        logging.error(f"Directory does not exist: {target_directory}")
except Exception as e:
    logging.exception(f"An error occurred while changing directory: {e}")

Changed directory to: c:\Users\pablosal\Desktop\gbb-ai-audio-agent


## **🧠 Agent Memory in Our RTAgent Architecture**

*Making conversations smarter, faster, and more personal*

#### **Why Memory Matters**

When agents forget recent or past user interactions, the user experience suffers. To build real-time, context-aware, and personalized AI agents, memory is separated into two types:

- **Short-term memory:** Tracks current session context.
- **Long-term memory:** Remembers previous sessions and user history.

Both are essential for coherent, informed, and scalable agents.

#### **Short-Term (Session) Memory**

- **Purpose:** Tracks what’s happening now.
- **Storage:** Redis (session-scoped, fast).
- **Managed by:** ConversationManager or memory orchestrator.
- **Stores:**  
    - Recent conversation turns  
    - Partial tool outputs  
    - Temporary state (e.g., queue, auth status)
- **Key Features:**  
    - Expiry: TTL (e.g., 1–2 hours)  
    - Isolation: Per session_id  
    - Speed: Fastest lookup

#### **Long-Term (User-Level) Memory**

- **Purpose:** Remembers user history across sessions.
- **Storage:** Azure Cosmos DB for MongoDB, vector-indexed.
- **Stores:**  
    - Conversation summaries  
    - Extracted knowledge  
    - User insights (preferences, topics)
- **How:**  
    - End-of-session: Summarize chat, generate embedding  
    - Store in Cosmos DB with metadata (user_id, summary, vector, tags, timestamp)
- **Usage:**  
    - At session start: Embed user query, search Cosmos DB, return top-k relevant memories  
    - Filter by metadata (e.g., category, status)

#### **Memory Workflow**

1. **During Session:**  
     - Redis stores ongoing dialog and transient state.
2. **End of Session:**  
     - Summarize, embed, and store in CosmosDB.
3. **Next Session Start:**  
     - Embed user query, search summaries, inject top results into context.

#### **Summary Table**

| Memory Type | Location       |      Scope         | Lifetime         | Example Use                        |
|-------------|----------------|--------------------|------------------|-------------------------------------|
| Short-Term  | Redis          | Session-only  | TTL (e.g., 1hr)  | Track auth flow, tool state         |
| Long-Term   | CosmosDB + Vectors  | Cross-session | Persistent       | Recall patient conditions, history  |

#### **Engineering Best Practices**

- Store user_id and session_id as metadata for secure, scoped retrieval.
- Summarize long chats before embedding; avoid raw chat logs.
- Use vector search with category filters for context-aware recall.
- Prune or cluster older memories to maintain relevance.

**This design ensures:**

- Agents act coherently within sessions (Redis).
- Agents stay informed across sessions (Cosmos vector memory).
- Fast, scalable performance for production.


###  **At Conversation End — Summarize & Store Memory**

In [9]:
from rtagents.RTMedAgent.backend.orchestration.conversation_state import (
    ConversationManager,
)
from src.aoai.manager import AzureOpenAIManager
from dotenv import load_dotenv

# -----------------------------
# Load Environment Variables
# -----------------------------
load_dotenv()

# --- Azure OpenAI Setup ---
AZURE_OPENAI_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT")
AZURE_OPENAI_KEY = os.getenv("AZURE_OPENAI_KEY")
AZURE_OPENAI_DEPLOYMENT = "text-embedding-3-small"
EMBED_MODEL = AZURE_OPENAI_DEPLOYMENT
EMB_FIELD = "contentVector"
MAX_DIM = 1536

aoai_client = AzureOpenAIManager(
    api_key=AZURE_OPENAI_KEY,
    api_version=os.getenv("AZURE_OPENAI_API_VERSION"),
    azure_endpoint=AZURE_OPENAI_ENDPOINT,
    embedding_model_name=EMBED_MODEL,
)
from src.redis.manager import AzureRedisManager

redis_manager = AzureRedisManager()

2025-06-16 13:52:47,852 - micro - MainProcess - INFO     Azure Redis connection initialized with access key. (manager.py:_create_client:82)
INFO:micro:Azure Redis connection initialized with access key.


In [10]:
# Example retrieval from redis after session end
SESSION_ID = "94eb58c3"
cm = ConversationManager.from_redis("94eb58c3", redis_manager)
history = cm.full_history()
context = cm.context

2025-06-16 13:52:50,942 - micro - MainProcess - INFO     Restored session 94eb58c3: 22 msgs total, ctx keys=['authenticated', 'active_agent', 'latency_roundtrip', 'tool_outputs', 'caller_name', 'policy_id', 'intake_completed'] (conversation_state.py:from_redis:48)
INFO:micro:Restored session 94eb58c3: 22 msgs total, ctx keys=['authenticated', 'active_agent', 'latency_roundtrip', 'tool_outputs', 'caller_name', 'policy_id', 'intake_completed']


In [12]:
PROMPT_TEMPLATE = """
You are an AI call wrap-up assistant.
Your job is to produce a MEMORY record **as a single JSON block**.

### INPUT
- Full dialog turns (chronological):
{history}
- Session context:
{context}

### RULES
1. **summary** → Maximum 3 plain-English sentences; capture only the final outcome and key details. Do not include details about successful authentication—only mention authentication if there are multiple failures.
2. **sentiment** → "positive", "neutral", or "negative" based on the caller's mood and the agent's tone.
3. **intent** → One of: "authentication", "claim_filed", "claim_inquiry", "other".
4. **entities** → Extract if present: caller_name, policy_id, claim_id.
5. Output **one valid JSON object** with keys:
{{
  "summary": "",
  "sentiment": "",
  "intent": "",
  "entities": {{
    "caller_name": "",
    "policy_id": "",
    "claim_id": ""
  }}
}}
""".strip()


In [14]:
import datetime, openai, json, re

def _strip_fence(txt: str) -> str:
    """Remove ```json … ``` fences if present."""
    return re.sub(r"^```(?:json)?\s*|\s*```$", "", txt.strip(), flags=re.I)

async def build_memory(history, context, openai_client):
    # Pretty-print history to avoid brace collisions
    hist_str = json.dumps(history, indent=2, ensure_ascii=False)
    ctx_str  = json.dumps(context, indent=2, ensure_ascii=False)

    prompt = PROMPT_TEMPLATE.format(history=hist_str, context=ctx_str)

    resp = await openai_client.generate_chat_response(
        query=prompt,
        conversation_history=[],
        temperature=0.2
    )

    raw = resp["response"]
    try:
        memory_json = json.loads(_strip_fence(raw))
    except json.JSONDecodeError as e:
        raise ValueError(f"Model did not return valid JSON: {e}\n---RAW---\n{raw}")

    # Add technical metadata
    memory_json.update(
        user_id=context.get("caller_name", "unknown"),
        timestamp=datetime.datetime.utcnow().isoformat(timespec="seconds") + "Z"
    )
    return memory_json


In [15]:
mem = await build_memory(history, context, aoai_client)

2025-06-16 13:53:02,880 - micro - MainProcess - INFO     Function generate_chat_response started at 2025-06-16 13:53:02 (manager.py:generate_chat_response:536)
INFO:micro:Function generate_chat_response started at 2025-06-16 13:53:02
2025-06-16 13:53:02,882 - micro - MainProcess - INFO     Sending request to Azure OpenAI at 2025-06-16 13:53:02 (manager.py:generate_chat_response:593)
INFO:micro:Sending request to Azure OpenAI at 2025-06-16 13:53:02
2025-06-16 13:53:04,770 - micro - MainProcess - INFO     Function generate_chat_response finished at 2025-06-16 13:53:04 (Duration: 1.89 seconds) (manager.py:generate_chat_response:647)
INFO:micro:Function generate_chat_response finished at 2025-06-16 13:53:04 (Duration: 1.89 seconds)


In [16]:
mem

{'summary': 'Alice Brown successfully filed a claim for a collision involving her blue Honda Civic. The incident occurred at an intersection near downtown Chicago, IL, with no injuries or additional property damage. The claim was recorded with ID CLA-2025-LZKBOY.',
 'sentiment': 'positive',
 'intent': 'claim_filed',
 'entities': {'caller_name': 'Alice Brown',
  'policy_id': 'POL-A10001',
  'claim_id': 'CLA-2025-LZKBOY'},
 'user_id': 'Alice Brown',
 'timestamp': '2025-06-16T18:53:04Z'}

### **📌 Step 1: Setup and Insert Embedded Data**

In [9]:
import os
import urllib.parse
from pymongo import MongoClient, errors

# --- MongoDB (Cosmos) Setup ---
COSMOS_MONGO_USER = os.getenv("COSMOS_MONGO_USER")
COSMOS_MONGO_PWD = os.getenv("COSMOS_MONGO_PWD")
COSMOS_MONGO_SERVER = os.getenv("COSMOS_MONGO_SERVER")
DB_NAME  = "memorydb"
COLL    = "long_term_memory"

# Format SRV URI
mongo_conn = (
    f"mongodb+srv://{urllib.parse.quote(COSMOS_MONGO_USER)}:"
    f"{urllib.parse.quote(COSMOS_MONGO_PWD)}@{COSMOS_MONGO_SERVER}"
    "?tls=true&authMechanism=SCRAM-SHA-256&retrywrites=false&maxIdleTimeMS=120000"
)

try:
    mongo_client = MongoClient(mongo_conn)
    db = mongo_client[DB_NAME]
    collection = db[COLL]
    print("✅ Connected to MongoDB.")
except errors.ConnectionError as e:
    raise RuntimeError(f"❌ MongoDB connection failed: {e}")

✅ Connected to MongoDB.


  mongo_client = MongoClient(mongo_conn)


### **📌 Step 2: Create DiskANN Vector Index**

In [10]:
# ── constants ───────────────────────────────────────────
EMB_FIELD   = "memoryVector"     # vector field in new docs
MAX_DIM     = 1536               # text-embedding-3-small
COLL_NAME   = collection.name    # same handle you already opened

try:
    db.command({
        "createIndexes": COLL_NAME,
        "indexes": [
            {
                "name": "diskann_memory_vec",
                "key":  { EMB_FIELD: "cosmosSearch" },
                "cosmosSearchOptions": {
                    "kind":        "vector-diskann",
                    "dimensions":  MAX_DIM,
                    "similarity":  "COS",
                    "maxDegree":   32,
                    "lBuild":      64
                }
            },
            # ▸ lookup / filtering
            { "name": "uid_idx",        "key": { "user_id": 1 } },          # filter per user
            { "name": "intent_idx",     "key": { "intent": 1 } },           # optional filter
            { "name": "ts_desc_idx",    "key": { "timestamp": -1 } }        # recent-first sort
        ]
    })
    print("✅ Vector & helper indexes created.")
except Exception as e:
    print("❌ Failed to create indexes:", e)


✅ Vector & helper indexes created.


### **Step 3: Insert Embedded Docs with Metadata**

In [61]:
emb_resp = aoai_client.generate_embedding(
    input_text = mem["summary"],
)
vec = emb_resp.data[0].embedding
if len(vec) != MAX_DIM:
    raise ValueError(f"Embedding dim mismatch: {len(vec)} (expected {MAX_DIM})")

# ── 2. attach vector + insert ───────────────────────────────────────
mem["memoryVector"] = vec           # field name matches the new index
insert_result = collection.insert_one(mem)

print("✅ memory stored:", insert_result.inserted_id)


✅ memory stored: 685047335b1975313bb03aca


### **Step 4: Perform a Vector Search (Retrieval)**

In [63]:
# ── make query vector ───────────────────────────────────────────────
query   = "Food recommendations for heart health"
resp    = aoai_client.generate_embedding(input_text=query)
q_emb   = resp.data[0].embedding

USER_ID = "Alice Brown"        # the current caller / session user
TOP_K   = 3                    # how many memories to bring back

# ── build aggregation pipeline ──────────────────────────────────────
pipeline = [
    {
        "$search": {
            "cosmosSearch": {
                "path": "memoryVector",      # vector field in new docs
                "vector": q_emb,
                "k": TOP_K,
                # return only this user’s memories
                "filter": { "user_id": { "$eq": USER_ID } }
            }
        }
    },
    # keep only the fields you need downstream
    {
        "$project": {
            "_id": 0,
            "summary":     1,
            "intent":      1,
            "sentiment":   1,
            "score": { "$meta": "searchScore" },
            "timestamp":   1
        }
    },
    # (optional) prefer recent memories if scores tie
    { "$sort": { "score": -1, "timestamp": -1 } }
]

results = list(collection.aggregate(pipeline))

print("🔍 Relevant memories:")
for r in results:
    print(f"- ({r['intent']}, {r['sentiment']}, score {r['score']:.3f}) → {r['summary']}")


🔍 Relevant memories:
- (claim_filed, positive, score 0.101) → Alice Brown successfully filed a claim for a collision involving her blue Honda Civic, which occurred at an intersection near downtown Chicago, IL. No injuries or additional property damage were reported. The claim was filed under policy ID POL-A10001 with claim ID CLA-2025-LZKBOY.


### **📌 Step 5: Inspect Indexes**

In [64]:
print("📄 Current Indexes:")
for idx in collection.list_indexes():
    print(idx)


📄 Current Indexes:
SON([('v', 2), ('key', SON([('_id', 1)])), ('name', '_id_')])
SON([('v', 2), ('key', SON([('memoryVector', 'cosmosSearch')])), ('name', 'diskann_memory_vec'), ('cosmosSearchOptions', SON([('kind', 'vector-diskann'), ('dimensions', 1536), ('similarity', 'COS'), ('maxDegree', 32), ('lBuild', 64)]))])
SON([('v', 2), ('key', SON([('user_id', 1)])), ('name', 'uid_idx')])
SON([('v', 2), ('key', SON([('intent', 1)])), ('name', 'intent_idx')])
SON([('v', 2), ('key', SON([('timestamp', -1)])), ('name', 'ts_desc_idx')])


### **📌 Step 6: Use Client**

In [17]:
from src.cosmosdb.manager import CosmosDBMongoCoreManager
import urllib.parse

# --- MongoDB (Cosmos) Setup ---
COSMOS_MONGO_USER = os.getenv("COSMOS_MONGO_USER")
COSMOS_MONGO_PWD = os.getenv("COSMOS_MONGO_PWD")
COSMOS_MONGO_SERVER = os.getenv("COSMOS_MONGO_SERVER")
DB_NAME  = "memorydb"
COLL    = "long_term_memory"

# Format SRV URI
mongo_conn = (
    f"mongodb+srv://{urllib.parse.quote(COSMOS_MONGO_USER)}:"
    f"{urllib.parse.quote(COSMOS_MONGO_PWD)}@{COSMOS_MONGO_SERVER}"
    "?tls=true&authMechanism=SCRAM-SHA-256&retrywrites=false&maxIdleTimeMS=120000"
)

mgr = CosmosDBMongoCoreManager(connection_string=mongo_conn, database_name=DB_NAME, collection_name=COLL)


  self.client = pymongo.MongoClient(connection_string)


In [20]:
# 1️⃣ one-time index bootstrap
mgr.ensure_index_from_yaml("rtagents/RTInsuranceAgent/backend/agents/memory_store/vector_index.yaml")

# 2️⃣ insert your memory doc (uses existing insert_document)
mgr.insert_document(mem)         # mem includes memoryVector already

ERROR:src.cosmosdb.manager:Duplicate key error while inserting document: Duplicate key violation on the requested collection: Index '_id_', full error: {'index': 0, 'code': 11000, 'errmsg': "Duplicate key violation on the requested collection: Index '_id_'"}


In [None]:
# 3️⃣ later: retrieve
results = mgr.semantic_search(
    query_text  = "collision involving her blue Honda Civic",
    user_id     = "Alice Brown",
    aoai_client = aoai_client,
    top_k       = 5
)

for r in results:
    print(json.dumps(r, indent=2))