In [1]:
import sys
import os
from pathlib import Path

# Add project root to path
project_root = Path(os.getcwd()).parent.parent
sys.path.append(str(project_root))

print(f"Project root added: {project_root}")

Project root added: d:\DEV\Python\interiit25\Observe\observe_ai


## 1. Test GraphStore
Tests connection to Neo4j and intent retrieval.

In [5]:
from src.rag.graph_store import GraphStore

graph_store = GraphStore()
print("GraphStore initialized successfully.")

# Test getting related intents for a known intent (e.g., 'Dispute_Charge' or something from your taxonomy)
# You might need to check your taxonomy.json or Neo4j DB for a valid intent name first.
test_intent = "Dispute_Charge" 
related = graph_store.get_related_intents(test_intent)
print(f"Related intents for '{test_intent}':")
print(related)

graph_store.close()

2025-12-02 18:51:39,298 - src.rag.graph_store - INFO - Connected to Neo4j GraphStore.


GraphStore initialized successfully.
Related intents for 'Dispute_Charge':
{'Process_Refund': 1.0, 'Explain_Charge': 0.8365758754863813, 'Verify_Identity': 0.5849546044098574, 'Provide_Promo_Codes': 0.3424124513618677, 'Investigate_Billing_Issue': 0.26718547341115434, 'Dispute_Charge': 1.0}


## 2. Test QueryClassifier
Tests LLM-based classification of queries.

In [6]:
from src.rag.query_classifier import QueryClassifier

try:
    classifier = QueryClassifier()
    print("QueryClassifier initialized.")
    
    test_query = "I want to dispute a charge on my bill."
    intent = classifier.classify(test_query)
    print(f"Query: '{test_query}'")
    print(f"Classified Intent: {intent}")
    
    test_query_2 = "Why is my internet so slow?"
    intent_2 = classifier.classify(test_query_2)
    print(f"Query: '{test_query_2}'")
    print(f"Classified Intent: {intent_2}")
    
except Exception as e:
    print(f"QueryClassifier Test Failed: {e}")

QueryClassifier initialized.
Query: 'I want to dispute a charge on my bill.'
Classified Intent: Dispute_Charge
Query: 'I want to dispute a charge on my bill.'
Classified Intent: Dispute_Charge
Query: 'Why is my internet so slow?'
Classified Intent: Diagnose_Technical_Issues
Query: 'Why is my internet so slow?'
Classified Intent: Diagnose_Technical_Issues


## 3. Test Retriever (Dual Retrieval)
Tests the full retrieval pipeline.

In [2]:
from src.rag.retriever import Retriever
from src.rag.embeddings import EmbeddingModel
from src.rag.vector_store import VectorStore
from src.rag.graph_store import GraphStore
from src.rag.transcript_store import TranscriptStore
from sentence_transformers import CrossEncoder
from src.config import Config

reranker = CrossEncoder(Config.RERANKER_MODEL)
embedding_model = EmbeddingModel()
vector_store = VectorStore()
graph_store = GraphStore()
transcript_store = TranscriptStore()

# Use the custom Retriever class
retriever = Retriever(
    embedding_model=embedding_model,
    vector_store=vector_store,
    graph_store=graph_store,
    transcript_store=transcript_store,
    reranker=reranker
)
print("Retriever initialized.")

2025-12-04 18:01:24,970 - sentence_transformers.cross_encoder.CrossEncoder - INFO - Use pytorch device: cpu
2025-12-04 18:01:26,929 - src.rag.embeddings - INFO - Loading embedding model: sentence-transformers/all-MiniLM-L6-v2
2025-12-04 18:01:26,929 - src.rag.embeddings - INFO - Using device: cpu
2025-12-04 18:01:26,929 - sentence_transformers.SentenceTransformer - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2
2025-12-04 18:01:26,929 - src.rag.embeddings - INFO - Loading embedding model: sentence-transformers/all-MiniLM-L6-v2
2025-12-04 18:01:26,929 - src.rag.embeddings - INFO - Using device: cpu
2025-12-04 18:01:26,929 - sentence_transformers.SentenceTransformer - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2
2025-12-04 18:01:32,307 - src.rag.embeddings - INFO - Initialized embedding model: sentence-transformers/all-MiniLM-L6-v2
2025-12-04 18:01:32,322 - src.rag.embeddings - INFO - Loading embedding model: sente

Retriever initialized.


In [7]:
# import importlib
# import src.rag.retriever as retriever_module

# importlib.reload(retriever_module)

# from src.rag.retriever import Retriever
# retriever = Retriever(
#     embedding_model=embedding_model,
#     vector_store=vector_store,
#     graph_store=graph_store,
#     transcript_store=transcript_store
# )

In [3]:
query = "Reason for refund rejection"
results = retriever.retrieve_dual(query)

2025-12-04 18:05:59,949 - src.rag.retriever - INFO - Reranking 50 candidates...


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

In [4]:
len(results)

10

In [5]:
results

[RetrievalResult(transcript_id='2f7d2a66-c447-4cbd-b76c-e61f297d3004', turn_index=11, content=['Agent: thanks for holding i have your account here uh you called about the refund related to um the billing error is that correct', "Customer: yes that's it there was um a mistake i need help with", "Agent: alright so i see a transaction on your account with reference txn 987654321 it appears that the refund request was denied based on the um shouldn't be evadence evidence", 'Customer: denied but i thought it would be processed more quickly um what was the reason', "Agent: the merchant provided documentation showing that the charge was valid so unfortunately we couldn't issue a refund", "Customer: that's really frustrating i had been counting on that money other banks like um bank of america seem to handle disputes um way faster", 'Agent: i understand your frustration um ahh and i apologize for the inconvenience civic trust has strict policies to ensure transparency in these situations'], me

In [8]:
query = "Reason for refund rejection"
results = retriever.retrieve_dual(query)

In [9]:
results

[]

## 4. Test Agent Retrieval Node
Tests the agent's retrieval node logic.

In [None]:
from observe_ai.src.rag.agentic_graph import ObserveAIAgent
from langchain_core.messages import HumanMessage
from src.rag.retriever import Retriever
from src.rag.embeddings import EmbeddingModel
from src.rag.vector_store import VectorStore
from src.rag.graph_store import GraphStore
from src.rag.transcript_store import TranscriptStore
from sentence_transformers import CrossEncoder
from src.rag.causal_pipeline import CausalPipeline

reranker = CrossEncoder(Config.RERANKER_MODEL)
embedding_model = EmbeddingModel()
vector_store = VectorStore()
graph_store = GraphStore()
transcript_store = TranscriptStore()
causal = CausalPipeline()
# Use the custom Retriever class
retriever = Retriever(
    embedding_model=embedding_model,
    vector_store=vector_store,
    graph_store=graph_store,
    transcript_store=transcript_store,
    reranker=reranker
)
print("Retriever initialized.")
agent = ObserveAIAgent(retriever=retriever, causal_pipeline=causal)
print("Agent initialized.")

# Create a dummy state
state = {
    "messages": [HumanMessage(content="What causes delays and claim approval?")],
    "context": None,
    "retrieved_results": None
}

# Run the retrieve node directly
new_state = agent.retrieve_node(state)

print("\nRetrieve Node Output:")
print(f"Context Length: {len(new_state['context'])}")
print(f"Number of Results: {len(new_state['retrieved_results'])}")
print("\nGenerated Context Preview:")
print(new_state['context'][:500])

2025-12-03 04:02:16,481 - src.agent.agentic_graph - INFO - Initialized ObserveAIAgent
2025-12-03 04:02:16,483 - src.agent.agentic_graph - INFO - Retrieve node: Fetching documents...


Agent initialized.


2025-12-03 04:02:19,734 - src.rag.retriever - INFO - Classified query 'Why was the refund rejected?' as intent: Question_Approval
2025-12-03 04:02:20,341 - src.rag.retriever - INFO - Related intents for Question_Approval: {'Clarify_Eligibility': 1.0, 'Offer_Partial_Increase': 0.34328358208955223, 'Apologize_for_Delays': 0.3164179104477612, 'Acknowledge_or_Confirm': 0.28059701492537314, 'Ask_for_Clarification': 0.24776119402985075, 'Question_Approval': 1.0}


Sorting results by score:
Expanding context for top results:

Retrieve Node Output:
Context Length: 4152
Number of Results: 5

Generated Context Preview:
--- Result 1 (Score: 0.65, Intent: Question_Approval) ---
Transcript ID: cae47e9a-e073-4a8f-b80a-af32f9bcc187, Turn Index: 15
Agent: right let me check um the merchant evidence uh does support the denial of your refund
Customer: denial what are you serious i thought this uh was straightforward
Agent: i know this is frustrating and i'm really sorry for um any inconvenience the evidence we received wasn't enough um to move forward with the refund
Customer: so it's actually denied that's ridiculous


In [13]:
print(new_state['context'])

--- Result 1 (Score: 0.65, Intent: Question_Approval) ---
Transcript ID: cae47e9a-e073-4a8f-b80a-af32f9bcc187, Turn Index: 15
Agent: right let me check um the merchant evidence uh does support the denial of your refund
Customer: denial what are you serious i thought this uh was straightforward
Agent: i know this is frustrating and i'm really sorry for um any inconvenience the evidence we received wasn't enough um to move forward with the refund
Customer: so it's actually denied that's ridiculous
Agent: yes unfortunately the final outcome unfortunately is denied i realize that's not what you wanted to hear
Customer: well why didn't anyone tell me this sooner it just feels like you're wasting my time
Agent: i completely understand your frustration ava and um i'm very sorry for the delay in communication


--- Result 2 (Score: 0.64, Intent: Question_Approval) ---
Transcript ID: 65be5609-4157-47c6-b469-3cc04f816b08, Turn Index: 5
Agent: right about your life insurance policy
Customer: yeah

# Check vector store

In [2]:
from src.rag.vector_store import VectorStore

# Initialize the store to check contents
print("Checking Vector Store...")
vs = VectorStore()
# vs = vector_store
# 1. Check the count
count = vs.collection.count()
print(f"Total documents in collection: {count}")

# 2. Peek at the data to verify structure
if count > 0:
    print("\n--- Peeking at first item ---")
    # peek() returns a dict with 'ids', 'embeddings', 'metadatas', 'documents'
    peek_data = vs.collection.peek(limit=1)
    
    print(f"ID: {peek_data['ids'][0]}")
    print(f"Metadata: {peek_data['metadatas'][0]}")
    print(f"Document (first 100 chars): {peek_data['documents'][0][:100]}...")
    
    # 3. Test a direct search
    print("\n--- Testing Direct Search ---")
    test_query = "refund"
    results = vs.vector_store.similarity_search_with_score(test_query, k=2)
    
    if results:
        print(f"Found {len(results)} results for '{test_query}':")
        for doc, score in results:
            print(f"  Score: {score:.4f} | Content: {doc.page_content[:50]}...")
    else:
        print(f"No results found for '{test_query}' despite having documents.")
else:
    print("!!! Collection is EMPTY. The vector database needs to be repopulated. !!!")


2025-12-04 22:25:11,697 - src.rag.embeddings - INFO - Loading embedding model: sentence-transformers/all-MiniLM-L6-v2
2025-12-04 22:25:11,698 - src.rag.embeddings - INFO - Using device: cpu
2025-12-04 22:25:11,698 - sentence_transformers.SentenceTransformer - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Checking Vector Store...


2025-12-04 22:25:15,916 - src.rag.embeddings - INFO - Initialized embedding model: sentence-transformers/all-MiniLM-L6-v2
2025-12-04 22:25:15,916 - src.rag.vector_store - INFO - Initializing Chroma vector store at: d:\DEV\Python\interiit25\Observe\observe_ai\data\vector_db
2025-12-04 22:25:15,968 - chromadb.telemetry.product.posthog - INFO - Anonymized telemetry enabled. See                     https://docs.trychroma.com/telemetry for more information.
2025-12-04 22:26:01,899 - src.rag.vector_store - INFO - Vector store initialized. Collection 'rag_collection'
Loaded with 681979 items.


Total documents in collection: 681979

--- Peeking at first item ---
ID: 79327ea0-d8b4-439e-9d6c-10e996678542
Metadata: {'transcript_id': 'ce44aa4b-13b7-4778-b413-15f37ee15f7a', 'secondary_intent': 'Offer_Resolution', 'time_of_interaction': '2025-11-10 16:23:00', 'primary_intent': 'Hotel_Complaint', 'global_intent': 'Discounts & Promotions', 'domain': 'Insurance', 'type': 'turn_with_context', 'turn_id': 'ce44aa4b-13b7-4778-b413-15f37ee15f7a_0', 'turn_index': 0, 'speaker': 'Agent', 'reason_for_call': 'Inquiry about promotional codes and potential loyalty benefits for booking at Beachfront Suites in Las Vegas.'}
Document (first 100 chars): Agent: thank you for calling beachfront suites this is isabella nguyen how can i help you today
Cust...

--- Testing Direct Search ---
Found 2 results for 'refund':
  Score: 0.6278 | Content: Customer: no that's it i suppose just um make sure...
  Score: 0.6379 | Content: Customer: great but i really need to see some acti...


## 5. Test New Agentic Graph
Tests the full self-correcting agent pipeline (Retrieve -> Generate -> Critique -> Loop).

In [3]:
from src.rag.retriever import Retriever
from src.rag.embeddings import EmbeddingModel
from src.rag.vector_store import VectorStore
from src.rag.graph_store import GraphStore
from src.rag.transcript_store import TranscriptStore
from sentence_transformers import CrossEncoder
from src.rag.causal_pipeline import CausalPipeline
from src.rag.agentic_graph import ObserveAIAgent
from src.config import Config

reranker = CrossEncoder(Config.RERANKER_MODEL)
embedding_model = EmbeddingModel()
vector_store = VectorStore()
graph_store = GraphStore()
transcript_store = TranscriptStore()
causal = CausalPipeline()

retriever = Retriever(
    embedding_model=embedding_model,
    vector_store=vector_store,
    graph_store=graph_store,
    transcript_store=transcript_store,
    reranker=reranker
)
print("Retriever initialized.")
agent = ObserveAIAgent(retriever=retriever, causal_pipeline=causal)
print("Agent initialized.")

# Test Query
query = "What causes delays and claim approval?"
print(f"Running agent with query: '{query}'")

# Run the full agent pipeline
# Note: This might take a few seconds as it involves multiple LLM calls
result = agent.invoke(query)

print("\n--- Agent Output ---")
print(f"Iterations: {result['iterations']}")
print(f"Final Score: {result['final_score']}")
print(f"Number of Retrieved Docs: {len(result['retrieved_results'])}")
print("\nGenerated Response:")
print(result['response'])

2025-12-04 22:26:40,412 - sentence_transformers.cross_encoder.CrossEncoder - INFO - Use pytorch device: cpu
2025-12-04 22:26:40,946 - src.rag.embeddings - INFO - Loading embedding model: sentence-transformers/all-MiniLM-L6-v2
2025-12-04 22:26:40,946 - src.rag.embeddings - INFO - Using device: cpu
2025-12-04 22:26:40,946 - sentence_transformers.SentenceTransformer - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2
2025-12-04 22:26:44,580 - src.rag.embeddings - INFO - Initialized embedding model: sentence-transformers/all-MiniLM-L6-v2
2025-12-04 22:26:44,580 - src.rag.embeddings - INFO - Loading embedding model: sentence-transformers/all-MiniLM-L6-v2
2025-12-04 22:26:44,580 - src.rag.embeddings - INFO - Using device: cpu
2025-12-04 22:26:44,597 - sentence_transformers.SentenceTransformer - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2
2025-12-04 22:26:48,532 - src.rag.embeddings - INFO - Initialized embedding model: s

Loading schema...
Loading dataset...
Loading SCM bundle...


configuration generated by an older version of XGBoost, please export the model by calling
`Booster.save_model` from that version first, then load it back in current version. See:

    https://xgboost.readthedocs.io/en/stable/tutorials/saving_model.html

for more details about differences between saving model and serializing.

  setstate(state)
2025-12-04 22:26:58,225 - src.rag.agentic_graph - INFO - ---RETRIEVE (Attempt 0)---


CausalPipeline initialized.
Retriever initialized.
Agent initialized.
Running agent with query: 'What causes delays and claim approval?'


2025-12-04 22:27:13,275 - src.rag.retriever - INFO - Reranking 50 candidates...


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

2025-12-04 22:27:16,361 - src.rag.agentic_graph - INFO - Retrieved 10 documents.
2025-12-04 22:27:16,365 - src.rag.agentic_graph - INFO - ---GENERATE CAUSAL ANALYSIS---
2025-12-04 22:27:24,746 - src.rag.causal_pipeline - INFO - No valid interventions for transcript_id 5da592e4-38fd-4125-8db2-1520dbf0df57. Skipping SCM.
2025-12-04 22:27:24,746 - src.rag.causal_pipeline - INFO - No valid interventions for transcript_id 786e2d74-949c-4cc5-a027-7f54fff4bf6c. Skipping SCM.
2025-12-04 22:27:24,746 - src.rag.causal_pipeline - INFO - No valid interventions for transcript_id 5da592e4-38fd-4125-8db2-1520dbf0df57. Skipping SCM.
2025-12-04 22:27:24,761 - src.rag.causal_pipeline - INFO - No valid interventions for transcript_id c3f5ad51-fe26-4ae0-953e-23ec5de526f6. Skipping SCM.
2025-12-04 22:27:24,765 - src.rag.causal_pipeline - INFO - No valid interventions for transcript_id 786e2d74-949c-4cc5-a027-7f54fff4bf6c. Skipping SCM.
2025-12-04 22:27:24,770 - src.rag.causal_pipeline - INFO - No valid int


--- Agent Output ---
Iterations: 1
Final Score: 0.9
Number of Retrieved Docs: 10

Generated Response:
**1. Executive Summary**

Delays in claim processing and approval are primarily caused by a combination of factors related to documentation, operational load, and the inherent complexity of the verification process. Customers frequently express frustration due to claims being "pending" without clear reasons, often stemming from the need for comprehensive verification of details, missing or erroneous information in submissions, and high volumes of claims. The lack of transparent communication regarding these common causes further exacerbates customer dissatisfaction.

**2. Key Causal Patterns**

*   **Pattern A: Incomplete, Erroneous, or Unverified Documentation**
    *   *Description:* A predominant cause of delays is the requirement for thorough verification of submitted details and documentation. This often includes instances where initial submissions are missing crucial documents o

## 6. Test Causal Pipeline
Tests the causal inference pipeline with query interpretation and SCM simulation.

In [None]:
from src.rag.causal_pipeline import CausalPipeline

In [12]:
import importlib,src.rag.causal_pipeline
importlib.reload(src.rag.causal_pipeline)

from src.rag.causal_pipeline import CausalPipeline

In [13]:
# Initialize Causal Pipeline
print("Initializing Causal Pipeline...")
causal_pipeline = CausalPipeline()
print("Causal Pipeline initialized successfully.\n")

# Test transcript IDs
test_transcript_ids = [
    "ce44aa4b-13b7-4778-b413-15f37ee15f7a",
    "6447a610-7803-4e29-918f-7255ece57195",
    "1e47a0f1-6334-45b3-8d04-84f8795d8d60"
]

# Test query - counterfactual question
test_query = "How would customer satisfaction have changed if the agent had shown more empathy during the interaction?"

print(f"Test Query: {test_query}")
print(f"Test Transcript IDs: {test_transcript_ids}\n")
print("Running causal analysis...\n")

# Run the pipeline
result = causal_pipeline.run(test_query, test_transcript_ids)

print("="*80)
print(result)
print("="*80)

2025-12-04 20:19:38,407 - src.rag.causal_pipeline - INFO - CausalPipeline: Loading resources...


Initializing Causal Pipeline...
Loading schema...
Loading dataset...
Loading SCM bundle...
Loading SCM bundle...
CausalPipeline initialized.
Causal Pipeline initialized successfully.

Test Query: How would customer satisfaction have changed if the agent had shown more empathy during the interaction?
Test Transcript IDs: ['ce44aa4b-13b7-4778-b413-15f37ee15f7a', '6447a610-7803-4e29-918f-7255ece57195', '1e47a0f1-6334-45b3-8d04-84f8795d8d60']

Running causal analysis...

CausalPipeline initialized.
Causal Pipeline initialized successfully.

Test Query: How would customer satisfaction have changed if the agent had shown more empathy during the interaction?
Test Transcript IDs: ['ce44aa4b-13b7-4778-b413-15f37ee15f7a', '6447a610-7803-4e29-918f-7255ece57195', '1e47a0f1-6334-45b3-8d04-84f8795d8d60']

Running causal analysis...

Causal Analysis Results:

Transcript ID: ce44aa4b-13b7-4778-b413-15f37ee15f7a
Query Type: counterfactual
Interventions:
  - emo_sadness_mean: 0.0010
  - emo_anger_mean: 

In [14]:
# Test with a different query - interventional question
test_query_2 = "What happens to resolution time if we increase agent response speed?"

print(f"\nTest Query 2: {test_query_2}\n")
print("Running causal analysis...\n")

result_2 = causal_pipeline.run(test_query_2, test_transcript_ids)

print("="*80)
print(result_2)
print("="*80)


Test Query 2: What happens to resolution time if we increase agent response speed?

Running causal analysis...

Causal Analysis Results:

Transcript ID: ce44aa4b-13b7-4778-b413-15f37ee15f7a
Query Type: interventional
Interventions:
  - avg_agent_utt_len: 8.3812
No targets specified.

Transcript ID: 6447a610-7803-4e29-918f-7255ece57195
Query Type: interventional
Interventions:
  - avg_agent_utt_len: 7.6193
No targets specified.

Transcript ID: 1e47a0f1-6334-45b3-8d04-84f8795d8d60
Query Type: interventional
Interventions:
  - avg_agent_utt_len: 11.8193
No targets specified.
Causal Analysis Results:

Transcript ID: ce44aa4b-13b7-4778-b413-15f37ee15f7a
Query Type: interventional
Interventions:
  - avg_agent_utt_len: 8.3812
No targets specified.

Transcript ID: 6447a610-7803-4e29-918f-7255ece57195
Query Type: interventional
Interventions:
  - avg_agent_utt_len: 7.6193
No targets specified.

Transcript ID: 1e47a0f1-6334-45b3-8d04-84f8795d8d60
Query Type: interventional
Interventions:
  - av

In [15]:
# Test with a descriptive query
test_query_3 = "Describe the agent's communication style in these interactions"

print(f"\nTest Query 3: {test_query_3}\n")
print("Running causal analysis...\n")

result_3 = causal_pipeline.run(test_query_3, test_transcript_ids)

print("="*80)
print(result_3)
print("="*80)


Test Query 3: Describe the agent's communication style in these interactions

Running causal analysis...



2025-12-04 20:24:05,126 - src.rag.causal_pipeline - INFO - No valid interventions for transcript_id ce44aa4b-13b7-4778-b413-15f37ee15f7a. Skipping SCM.
2025-12-04 20:24:05,126 - src.rag.causal_pipeline - INFO - No valid interventions for transcript_id 6447a610-7803-4e29-918f-7255ece57195. Skipping SCM.
2025-12-04 20:24:05,126 - src.rag.causal_pipeline - INFO - No valid interventions for transcript_id 1e47a0f1-6334-45b3-8d04-84f8795d8d60. Skipping SCM.
2025-12-04 20:24:05,126 - src.rag.causal_pipeline - INFO - No valid interventions for transcript_id 6447a610-7803-4e29-918f-7255ece57195. Skipping SCM.
2025-12-04 20:24:05,126 - src.rag.causal_pipeline - INFO - No valid interventions for transcript_id 1e47a0f1-6334-45b3-8d04-84f8795d8d60. Skipping SCM.


Causal Analysis Results:

Transcript ID: ce44aa4b-13b7-4778-b413-15f37ee15f7a
Query Type: descriptive
Targets (Before -> After, Delta):
  - num_agent_turns: 14.0000 -> 14.0000 (Delta: 0.0000)
  - total_words_agent: 244.0000 -> 244.0000 (Delta: 0.0000)
  - avg_agent_utt_len: 17.4286 -> 17.4286 (Delta: 0.0000)

Transcript ID: 6447a610-7803-4e29-918f-7255ece57195
Query Type: descriptive
Targets (Before -> After, Delta):
  - num_agent_turns: 15.0000 -> 15.0000 (Delta: 0.0000)
  - total_words_agent: 250.0000 -> 250.0000 (Delta: 0.0000)
  - avg_agent_utt_len: 16.6667 -> 16.6667 (Delta: 0.0000)

Transcript ID: 1e47a0f1-6334-45b3-8d04-84f8795d8d60
Query Type: descriptive
Targets (Before -> After, Delta):
  - num_agent_turns: 15.0000 -> 15.0000 (Delta: 0.0000)
  - total_words_agent: 313.0000 -> 313.0000 (Delta: 0.0000)
  - avg_agent_utt_len: 20.8667 -> 20.8667 (Delta: 0.0000)


## 7. Test Agent Memory & Retry Logic
Tests the agent's ability to store memory and handle follow-up queries with the new logic.

In [14]:
from src.rag.agentic_graph import ObserveAIAgent
from src.rag.retriever import Retriever
from src.rag.causal_pipeline import CausalPipeline
from src.rag.embeddings import EmbeddingModel
from src.rag.vector_store import VectorStore
from src.rag.graph_store import GraphStore
from src.rag.transcript_store import TranscriptStore
from sentence_transformers import CrossEncoder
from src.config import Config
from langchain_google_genai import ChatGoogleGenerativeAI

llm = ChatGoogleGenerativeAI(
    model=Config.GEMINI_MODEL,
    google_api_key=Config.GEMINI_API_KEY,
    temperature=0.1
)
# Re-initialize components to ensure clean state
reranker = CrossEncoder(Config.RERANKER_MODEL)
embedding_model = EmbeddingModel()
vector_store = VectorStore()
graph_store = GraphStore()
transcript_store = TranscriptStore()
causal = CausalPipeline(llm=llm)

retriever = Retriever(
    embedding_model=embedding_model,
    vector_store=vector_store,
    graph_store=graph_store,
    transcript_store=transcript_store,
    reranker=reranker,
    llm=llm
)

2025-12-05 16:17:48,087 - sentence_transformers.cross_encoder.CrossEncoder - INFO - Use pytorch device: cpu
2025-12-05 16:17:48,623 - src.rag.embeddings - INFO - Loading embedding model: sentence-transformers/all-MiniLM-L6-v2
2025-12-05 16:17:48,624 - src.rag.embeddings - INFO - Using device: cpu
2025-12-05 16:17:48,636 - sentence_transformers.SentenceTransformer - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2
2025-12-05 16:17:52,554 - src.rag.embeddings - INFO - Initialized embedding model: sentence-transformers/all-MiniLM-L6-v2
2025-12-05 16:17:52,554 - src.rag.embeddings - INFO - Loading embedding model: sentence-transformers/all-MiniLM-L6-v2
2025-12-05 16:17:52,570 - src.rag.embeddings - INFO - Using device: cpu
2025-12-05 16:17:52,579 - sentence_transformers.SentenceTransformer - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2
2025-12-05 16:17:57,265 - src.rag.embeddings - INFO - Initialized embedding model: s

Loading schema...
Loading dataset...
Loading SCM bundle...


configuration generated by an older version of XGBoost, please export the model by calling
`Booster.save_model` from that version first, then load it back in current version. See:

    https://xgboost.readthedocs.io/en/stable/tutorials/saving_model.html

for more details about differences between saving model and serializing.

  setstate(state)


CausalPipeline initialized.


In [None]:
# import importlib,src.rag.retriever
# importlib.reload(src.rag.retriever)
# from src.rag.retriever import Retriever
# retriever = Retriever(
#     embedding_model=embedding_model,
#     vector_store=vector_store,
#     graph_store=graph_store,
#     transcript_store=transcript_store,
#     reranker=reranker
# )

In [None]:
import importlib,src.rag.agentic_graph
importlib.reload(src.rag.agentic_graph)
from src.rag.agentic_graph import ObserveAIAgent

In [35]:
agent = ObserveAIAgent(retriever=retriever, causal_pipeline=causal, llm=llm, embedder=embedding_model)
print("Agent initialized for Memory Test.")

Agent initialized for Memory Test.


In [None]:
# 1. First Query (Should trigger 'retrieve' routing and store memory)
query1 = "What are the main reasons for customer dissatisfaction?"
print(f"\n--- Turn 1: {query1} ---")
result1 = agent.invoke(query1)
print(f"Memory Stored: {result1['memory_stored']}")
print(f"Response Length: {len(result1['response'])}")


--- Turn 1: What are the main reasons for customer dissatisfaction? ---


2025-12-05 17:23:46,849 - src.rag.agentic_graph - INFO - Rewrites generated: 5
2025-12-05 17:23:46,849 - src.rag.agentic_graph - INFO - First conversation detected (no memories). Routing: retrieve
2025-12-05 17:23:46,849 - src.rag.agentic_graph - INFO - Running full dual retrieval (retrieve) on 5 queries.
2025-12-05 17:23:46,849 - src.rag.agentic_graph - INFO - First conversation detected (no memories). Routing: retrieve
2025-12-05 17:23:46,849 - src.rag.agentic_graph - INFO - Running full dual retrieval (retrieve) on 5 queries.


Rewritten queries:

- What causes customer dissatisfaction?
- Why do customers become dissatisfied?
- Provide examples of situations that lead to customer dissatisfaction.
- How could customer dissatisfaction have been prevented?
- What specific product or service issues cause customer dissatisfaction?
Classified Intent: Ask_for_Clarification
Classified Intent: Ask_for_Clarification
Classified Intent: Ask_for_Clarification
Classified Intent: Ask_for_Clarification
Classified Intent: Ask_for_Clarification
Classified Intent: Ask_for_Clarification
Classified Intent: Ask_for_Clarification
Classified Intent: Ask_for_Clarification
Classified Intent: Diagnose_Issue
Classified Intent: Diagnose_Issue


2025-12-05 17:24:20,490 - src.rag.retriever - INFO - Batch reranking 250 pairs across 5 queries...


Batches:   0%|          | 0/8 [00:00<?, ?it/s]

2025-12-05 17:24:30,991 - src.rag.agentic_graph - INFO - Retrieved 37 docs
2025-12-05 17:24:30,994 - src.rag.agentic_graph - INFO - ---GENERATE CAUSAL ANALYSIS---
2025-12-05 17:24:30,994 - src.rag.agentic_graph - INFO - ---GENERATE CAUSAL ANALYSIS---
2025-12-05 17:24:33,534 - src.rag.agentic_graph - INFO - Generation Mode: analysis
2025-12-05 17:24:33,534 - src.rag.agentic_graph - INFO - Generation Mode: analysis
2025-12-05 17:24:39,681 - src.rag.causal_pipeline - INFO - No valid interventions for transcript_id 8646d0c8-3c73-406a-85c9-1f190be0342a. Skipping SCM.
2025-12-05 17:24:39,690 - src.rag.causal_pipeline - INFO - No valid interventions for transcript_id f83cd9ff-0bad-4d3c-a0ec-b66ff586faa0. Skipping SCM.
2025-12-05 17:24:39,690 - src.rag.causal_pipeline - INFO - No valid interventions for transcript_id 323a297d-2b51-4768-8330-71fe5f49a303. Skipping SCM.
2025-12-05 17:24:39,700 - src.rag.causal_pipeline - INFO - No valid interventions for transcript_id 0b335e64-038a-4ce1-9bf9-5c7

Evaluating:   0%|          | 0/4 [00:00<?, ?it/s]

An error occurred: 'ChatGoogleGenerativeAI' object has no attribute 'agenerate_text'. Skipping a sample by assigning it nan score.


2025-12-05 17:25:37,768 - ragas.executor - ERROR - Exception raised in Job[2]: ValidationError(1 validation error for ResponseRelevanceOutput
  Invalid JSON: expected value at line 1 column 1 [type=json_invalid, input_value='```json\n{\n    "questio...oncommittal": 0\n}\n```', input_type=str]
    For further information visit https://errors.pydantic.dev/2.12/v/json_invalid)
2025-12-05 17:25:42,609 - ragas.executor - ERROR - Exception raised in Job[0]: ValidationError(1 validation error for Verification
  Invalid JSON: expected value at line 1 column 1 [type=json_invalid, input_value='```json\n{\n  "reason": ...n  "verdict": 1\n}\n```', input_type=str]
    For further information visit https://errors.pydantic.dev/2.12/v/json_invalid)
2025-12-05 17:25:42,609 - ragas.executor - ERROR - Exception raised in Job[0]: ValidationError(1 validation error for Verification
  Invalid JSON: expected value at line 1 column 1 [type=json_invalid, input_value='```json\n{\n  "reason": ...n  "verdict": 1\

Memory Stored: None
Response Length: 9341
Ragas Scores: {'llm_context_precision_without_reference': nan, 'nv_context_relevance': nan, 'answer_relevancy': nan, 'faithfulness': nan}


In [38]:
print(result1['ragas_scores'])

{'llm_context_precision_without_reference': nan, 'nv_context_relevance': nan, 'answer_relevancy': nan, 'faithfulness': nan}


In [37]:
print(result1['response'])

**1. Executive Summary**

Customer dissatisfaction primarily stems from a confluence of factors related to perceived poor value, inconsistent service quality, and inadequate communication and support. Customers frequently express frustration when they feel they are paying too much for a service that does not meet their expectations or is inferior to competitors. Recurring issues with service reliability, such as outages, delays, or basic feature limitations, significantly erode trust. Furthermore, a lack of transparency in pricing and policies, coupled with unhelpful or dismissive customer support interactions, exacerbates negative sentiments, often leading customers to consider switching providers.

**2. Key Causal Patterns**

*   **Pattern A: Perceived Poor Value and Unfair Pricing**
    *   *Description:* Customers consistently express dissatisfaction when they perceive that the price of a product or service is too high relative to its quality or the value offered by competitors. Th

In [21]:
# 2. Follow-up Query (Should trigger 'memory_only' or 'hybrid' and use fewer rewrites)
query2 = "How could it have been avoided?"
print(f"\n--- Turn 2: {query2} ---")
result2 = agent.invoke(query2)
print(f"Response Length: {len(result2['response'])}")
print(f"Iterations: {result2['iterations']}")

# Check internal memory store
print("\n--- Internal Memory State ---")
memories = agent.memory.all()
print(f"Total Memories: {len(memories)}")
if memories:
    print(f"Last Memory Summary: {memories[-1].summary}")



--- Turn 2: How could it have been avoided? ---


2025-12-05 07:04:32,211 - src.rag.agentic_graph - INFO - Rewrites generated: 2
2025-12-05 07:04:32,211 - src.rag.agentic_graph - INFO - Memory candidates: 1
2025-12-05 07:04:32,211 - src.rag.agentic_graph - INFO - Memory routing decision: retrieve (selected 0 memories)
2025-12-05 07:04:32,211 - src.rag.agentic_graph - INFO - Running full dual retrieval (retrieve) on 2 queries.
2025-12-05 07:04:32,211 - src.rag.agentic_graph - INFO - Memory candidates: 1
2025-12-05 07:04:32,211 - src.rag.agentic_graph - INFO - Memory routing decision: retrieve (selected 0 memories)
2025-12-05 07:04:32,211 - src.rag.agentic_graph - INFO - Running full dual retrieval (retrieve) on 2 queries.


Rewritten queries:

- How could customer dissatisfaction have been avoided?
- What actions prevent customer churn?
Classified Intent: Investigate_Complaint
Classified Intent: Investigate_Complaint
Classified Intent: Inquire_About_Retention_Offers
Classified Intent: Inquire_About_Retention_Offers


2025-12-05 07:04:50,535 - src.rag.retriever - INFO - Batch reranking 100 pairs across 2 queries...


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2025-12-05 07:04:54,742 - src.rag.agentic_graph - INFO - Retrieved 20 docs
2025-12-05 07:04:54,745 - src.rag.agentic_graph - INFO - ---GENERATE CAUSAL ANALYSIS---
2025-12-05 07:04:54,745 - src.rag.agentic_graph - INFO - ---GENERATE CAUSAL ANALYSIS---
2025-12-05 07:05:38,057 - src.rag.agentic_graph - INFO - Generation complete.
2025-12-05 07:05:38,057 - src.rag.agentic_graph - INFO - ---CRITIQUE ANSWER---
2025-12-05 07:05:38,057 - src.rag.agentic_graph - INFO - Generation complete.
2025-12-05 07:05:38,057 - src.rag.agentic_graph - INFO - ---CRITIQUE ANSWER---
2025-12-05 07:05:45,526 - src.rag.agentic_graph - INFO - Critique pass=True; failed_checks=[]; reason=
2025-12-05 07:05:45,540 - src.rag.agentic_graph - INFO - ---SUMMARY GENERATION & MEM STORE---
2025-12-05 07:05:45,526 - src.rag.agentic_graph - INFO - Critique pass=True; failed_checks=[]; reason=
2025-12-05 07:05:45,540 - src.rag.agentic_graph - INFO - ---SUMMARY GENERATION & MEM STORE---
2025-12-05 07:05:49,108 - src.rag.agentic

Response Length: 6393
Iterations: 1

--- Internal Memory State ---
Total Memories: 2
Last Memory Summary: Customer dissatisfaction and churn are primarily driven by systemic service failures and a perceived lack of value, not merely a lack of emotional support. While agent empathy can mitigate immediate negative emotions, it often fails to improve overall customer sentiment when core issues like denied claims or service outages remain unresolved. To effectively reduce churn, agents must move beyond superficial emotional interventions to proactively identify, escalate, and resolve systemic problems, 


In [23]:
print(result2['response'])

**1. Executive Summary**

The primary driver of customer dissatisfaction and churn risk, which could have been avoided, stems from systemic service failures and a perceived lack of genuine value or acknowledgment, rather than solely from a lack of emotional support. While agents' efforts to reduce negative emotions and express approval are effective in mitigating immediate emotional distress, these interventions often fail to significantly improve overall customer sentiment when core issues like denied claims, service outages, unclear policies, or ineffective loyalty programs remain unresolved. Customers frequently express feeling undervalued by generic solutions or a lack of proactive, reliable follow-up, indicating that a deeper, more structural approach to problem resolution and value demonstration is necessary to prevent negative outcomes.

**2. Key Causal Patterns**

*   **Pattern A: Systemic Service Failures Undermine Emotional Support.**
    *   *Evidence:* Customers consistentl