In [1]:
# Make sure autoreload is properly set up for auto reload the module if changes
%load_ext autoreload
%autoreload 2

In [2]:
import os, sys
from dotenv import load_dotenv, find_dotenv

current_dir = os.getcwd()
parent_dir = os.path.dirname(current_dir)
sys.path.append(parent_dir)


_ = load_dotenv(find_dotenv())

# print(os.environ.get('OPENAI_API_KEY'))
print(parent_dir)

/home/ct-admin/Documents/Langgraph/00_AudioProject/02_Call_Center_AI_Agent


In [3]:
# ./src/Agents/call_center_agent/test_router.py
"""
Dedicated Router Testing - Test LLM router classification decisions.
"""
import logging
from typing import Dict, Any, List, Tuple
from langchain_ollama import ChatOllama
from langchain_core.messages import SystemMessage, HumanMessage, AIMessage

from src.Agents.call_center_agent.prompts import get_router_prompt, parse_router_decision
from src.Agents.call_center_agent.state import CallStep

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)


class RouterTester:
    """Test the LLM router classification system."""
    
    def __init__(self):
        self.router_llm = ChatOllama(model="qwen2.5:3b-instruct", temperature=0)
    
    def test_router_decision(self, current_step: str, client_message: str, context_messages: List = None) -> str:
        """Test a single router decision."""
        
        # Build test state
        state = {
            "current_step": current_step,
            "client_name": "John Smith",
            "messages": context_messages or [
                AIMessage(content="Good day, this is Agent from Cartrack."),
                HumanMessage(content=client_message)
            ]
        }
        
        # Get router prompt
        prompt_content = get_router_prompt(state)
        prompt = [SystemMessage(content=prompt_content)]
        
        # Get LLM decision
        try:
            response = self.router_llm.invoke(prompt)
            classification = parse_router_decision(response, state)
            return classification
        except Exception as e:
            logger.error(f"Router test failed: {e}")
            return "ERROR"
    
    def run_test_suite(self):
        """Run comprehensive test suite for router."""
        
        print("=" * 60)
        print("ROUTER CLASSIFICATION TEST SUITE")
        print("=" * 60)
        
        # Test cases: (current_step, client_message, expected_classification)
        test_cases = [
            # NAME VERIFICATION TESTS
            ("name_verification", "Yes, this is John", "STEP_RELATED"),
            ("name_verification", "Who is this calling?", "STEP_RELATED"),
            ("name_verification", "What's my account balance?", "QUERY_UNRELATED"),
            ("name_verification", "I want to speak to supervisor", "ESCALATION"),
            
            # REASON FOR CALL TESTS
            ("reason_for_call", "I understand", "STEP_RELATED"),
            ("reason_for_call", "How much do I owe?", "STEP_RELATED"),
            ("reason_for_call", "Why wasn't my payment taken?", "QUERY_UNRELATED"),
            ("reason_for_call", "This is wrong, I paid already", "OBJECTION"),
            
            # NEGOTIATION TESTS
            ("negotiation", "I can't afford that much", "OBJECTION"),
            ("negotiation", "What happens if I don't pay?", "QUERY_UNRELATED"),
            ("negotiation", "OK, I understand the consequences", "STEP_RELATED"),
            ("negotiation", "Cancel my account", "ESCALATION"),
            
            # PROMISE TO PAY TESTS
            ("promise_to_pay", "Yes, you can debit my account", "AGREEMENT"),
            ("promise_to_pay", "I can't pay the full amount", "OBJECTION"),
            ("promise_to_pay", "How does DebiCheck work?", "QUERY_UNRELATED"),
            ("promise_to_pay", "I need to think about it", "OBJECTION"),
            
            # GENERAL ESCALATION TESTS
            ("any_step", "I want to speak to your manager", "ESCALATION"),
            ("any_step", "This is harassment", "ESCALATION"),
            ("any_step", "I'm cancelling my service", "ESCALATION"),
            ("any_step", "I want to file a complaint", "ESCALATION"),
        ]
        
        passed = 0
        failed = 0
        
        for step, message, expected in test_cases:
            print(f"\nTesting: [{step}] '{message}'")
            result = self.test_router_decision(step, message)
            
            if result == expected:
                print(f"✅ PASS: {result}")
                passed += 1
            else:
                print(f"❌ FAIL: Expected {expected}, got {result}")
                failed += 1
        
        print("\n" + "=" * 60)
        print(f"RESULTS: {passed} passed, {failed} failed")
        print(f"Success rate: {passed/(passed+failed)*100:.1f}%")
        print("=" * 60)
        
        return passed, failed
    
    def test_conversation_flow(self):
        """Test router in a realistic conversation flow."""
        
        print("\n" + "=" * 60)
        print("CONVERSATION FLOW TEST")
        print("=" * 60)
        
        # Simulate a realistic conversation
        conversation = [
            ("introduction", "Hello?", "STEP_RELATED"),
            ("name_verification", "Yes, this is John speaking", "STEP_RELATED"),
            ("details_verification", "My ID is 8312345678901", "STEP_RELATED"),
            ("reason_for_call", "What's this about?", "QUERY_UNRELATED"),
            ("reason_for_call", "I understand I owe money", "STEP_RELATED"),
            ("negotiation", "I can't pay that much right now", "OBJECTION"),
            ("negotiation", "What are my options?", "QUERY_UNRELATED"),
            ("promise_to_pay", "Can I pay half today?", "OBJECTION"),
            ("promise_to_pay", "Yes, debit R200 from my account", "AGREEMENT"),
        ]
        
        # Build progressive message history
        messages = [AIMessage(content="Good day, this is Agent from Cartrack.")]
        
        for step, client_msg, expected in conversation:
            # Add client message
            messages.append(HumanMessage(content=client_msg))
            
            # Test router decision
            state = {
                "current_step": step,
                "client_name": "John Smith", 
                "messages": messages.copy()
            }
            
            result = self.test_router_decision(step, client_msg, messages.copy())
            
            print(f"[{step}] Client: '{client_msg}'")
            print(f"Expected: {expected}, Got: {result} {'✅' if result == expected else '❌'}")
            
            # Add simulated agent response
            if result == "QUERY_UNRELATED":
                messages.append(AIMessage(content="I'll explain briefly. Now, regarding your payment..."))
            elif result == "ESCALATION":
                messages.append(AIMessage(content="I understand your concern. Let me help resolve this."))
            else:
                messages.append(AIMessage(content="Thank you for that information."))
            
            print()
    
    def test_edge_cases(self):
        """Test edge cases and difficult scenarios."""
        
        print("\n" + "=" * 60)
        print("EDGE CASE TESTING")
        print("=" * 60)
        
        edge_cases = [
            # Ambiguous messages
            ("negotiation", "OK", "STEP_RELATED"),
            ("promise_to_pay", "Maybe", "OBJECTION"),
            ("any_step", "Hmm", "STEP_RELATED"),
            
            # Mixed intent
            ("negotiation", "I understand but can't pay", "OBJECTION"),
            ("promise_to_pay", "Yes but not the full amount", "OBJECTION"),
            
            # Long messages
            ("reason_for_call", "I'm really confused about why you're calling me because I thought I paid my bill last month and I don't understand what's happening", "QUERY_UNRELATED"),
            
            # Empty/short messages
            ("any_step", "", "STEP_RELATED"),
            ("any_step", "What?", "STEP_RELATED"),
        ]
        
        for step, message, expected in edge_cases:
            result = self.test_router_decision(step, message)
            status = "✅" if result == expected else "❌"
            print(f"{status} [{step}] '{message}' → {result} (expected: {expected})")


def run_all_tests():
    """Run all router tests."""
    tester = RouterTester()
    
    # Run test suite
    passed, failed = tester.run_test_suite()
    
    # Run conversation flow test
    tester.test_conversation_flow()
    
    # Run edge case tests
    tester.test_edge_cases()
    
    return passed, failed


if __name__ == "__main__":
    run_all_tests()

2025-05-28 21:53:51,076 - INFO - PyTorch version 2.7.0+cu128 available.
2025-05-28 21:53:53,526 - INFO - Database connection established successfully


ROUTER CLASSIFICATION TEST SUITE

Testing: [name_verification] 'Yes, this is John'


2025-05-28 21:53:55,239 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"
2025-05-28 21:53:55,319 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"
2025-05-28 21:53:55,383 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"
2025-05-28 21:53:55,445 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"


❌ FAIL: Expected STEP_RELATED, got AGREEMENT

Testing: [name_verification] 'Who is this calling?'
❌ FAIL: Expected STEP_RELATED, got OBJECTION

Testing: [name_verification] 'What's my account balance?'
❌ FAIL: Expected QUERY_UNRELATED, got OBJECTION

Testing: [name_verification] 'I want to speak to supervisor'
✅ PASS: ESCALATION

Testing: [reason_for_call] 'I understand'


2025-05-28 21:53:55,510 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"
2025-05-28 21:53:55,572 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"
2025-05-28 21:53:55,635 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"
2025-05-28 21:53:55,698 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"


✅ PASS: STEP_RELATED

Testing: [reason_for_call] 'How much do I owe?'
✅ PASS: STEP_RELATED

Testing: [reason_for_call] 'Why wasn't my payment taken?'
❌ FAIL: Expected QUERY_UNRELATED, got OBJECTION

Testing: [reason_for_call] 'This is wrong, I paid already'
✅ PASS: OBJECTION

Testing: [negotiation] 'I can't afford that much'


2025-05-28 21:53:55,762 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"
2025-05-28 21:53:55,824 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"
2025-05-28 21:53:55,887 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"
2025-05-28 21:53:55,951 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"


✅ PASS: OBJECTION

Testing: [negotiation] 'What happens if I don't pay?'
❌ FAIL: Expected QUERY_UNRELATED, got OBJECTION

Testing: [negotiation] 'OK, I understand the consequences'
✅ PASS: STEP_RELATED

Testing: [negotiation] 'Cancel my account'
❌ FAIL: Expected ESCALATION, got OBJECTION

Testing: [promise_to_pay] 'Yes, you can debit my account'


2025-05-28 21:53:56,018 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"
2025-05-28 21:53:56,080 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"
2025-05-28 21:53:56,143 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"
2025-05-28 21:53:56,206 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"


✅ PASS: AGREEMENT

Testing: [promise_to_pay] 'I can't pay the full amount'
✅ PASS: OBJECTION

Testing: [promise_to_pay] 'How does DebiCheck work?'
✅ PASS: QUERY_UNRELATED

Testing: [promise_to_pay] 'I need to think about it'
✅ PASS: OBJECTION

Testing: [any_step] 'I want to speak to your manager'


2025-05-28 21:53:56,272 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"
2025-05-28 21:53:56,333 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"
2025-05-28 21:53:56,397 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"
2025-05-28 21:53:56,460 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"


✅ PASS: ESCALATION

Testing: [any_step] 'This is harassment'
❌ FAIL: Expected ESCALATION, got OBJECTION

Testing: [any_step] 'I'm cancelling my service'
❌ FAIL: Expected ESCALATION, got OBJECTION

Testing: [any_step] 'I want to file a complaint'
❌ FAIL: Expected ESCALATION, got OBJECTION

RESULTS: 11 passed, 9 failed
Success rate: 55.0%

CONVERSATION FLOW TEST


2025-05-28 21:53:56,525 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"
2025-05-28 21:53:56,591 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"
2025-05-28 21:53:56,661 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"
2025-05-28 21:53:56,726 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"


[introduction] Client: 'Hello?'
Expected: STEP_RELATED, Got: QUERY_UNRELATED ❌

[name_verification] Client: 'Yes, this is John speaking'
Expected: STEP_RELATED, Got: AGREEMENT ❌

[details_verification] Client: 'My ID is 8312345678901'
Expected: STEP_RELATED, Got: OBJECTION ❌



2025-05-28 21:53:56,792 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"
2025-05-28 21:53:56,857 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"
2025-05-28 21:53:56,920 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"


[reason_for_call] Client: 'What's this about?'
Expected: QUERY_UNRELATED, Got: QUERY_UNRELATED ✅

[reason_for_call] Client: 'I understand I owe money'
Expected: STEP_RELATED, Got: STEP_RELATED ✅

[negotiation] Client: 'I can't pay that much right now'
Expected: OBJECTION, Got: OBJECTION ✅

[negotiation] Client: 'What are my options?'
Expected: QUERY_UNRELATED, Got: OBJECTION ❌



2025-05-28 21:53:56,984 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"
2025-05-28 21:53:57,050 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"
2025-05-28 21:53:57,112 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"
2025-05-28 21:53:57,174 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"


[promise_to_pay] Client: 'Can I pay half today?'
Expected: OBJECTION, Got: OBJECTION ✅

[promise_to_pay] Client: 'Yes, debit R200 from my account'
Expected: AGREEMENT, Got: AGREEMENT ✅


EDGE CASE TESTING
❌ [negotiation] 'OK' → OBJECTION (expected: STEP_RELATED)
✅ [promise_to_pay] 'Maybe' → OBJECTION (expected: OBJECTION)


2025-05-28 21:53:57,235 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"
2025-05-28 21:53:57,297 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"
2025-05-28 21:53:57,361 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"
2025-05-28 21:53:57,427 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"


❌ [any_step] 'Hmm' → OBJECTION (expected: STEP_RELATED)
✅ [negotiation] 'I understand but can't pay' → OBJECTION (expected: OBJECTION)
✅ [promise_to_pay] 'Yes but not the full amount' → OBJECTION (expected: OBJECTION)
❌ [reason_for_call] 'I'm really confused about why you're calling me because I thought I paid my bill last month and I don't understand what's happening' → OBJECTION (expected: QUERY_UNRELATED)


2025-05-28 21:53:57,490 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"
2025-05-28 21:53:57,551 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"


❌ [any_step] '' → OBJECTION (expected: STEP_RELATED)
❌ [any_step] 'What?' → OBJECTION (expected: STEP_RELATED)
