# Agent Testing Notebook

This notebook tests individual modules from agent.py. 

**Workflow**: 
1. Modify agent.py functions
2. Run the reload cell below
3. Run individual test cells to validate changes

In [2]:
# Reload agent.py after making changes
import importlib
import agent
importlib.reload(agent)
from agent import *

print("Agent module reloaded successfully!")

Agent module reloaded successfully!


## Setup Configuration

In [3]:
# Setup environment and configuration
import os
from dotenv import load_dotenv
import dspy

load_dotenv()

# Print configuration
print(f"SMALL_MODEL: {SMALL_MODEL}")
print(f"BIG_MODEL: {BIG_MODEL}")
print(f"TEMPERATURE: {TEMPERATURE}")
print(f"MAX_TOKENS: {MAX_TOKENS}")

# Configure DSPy default
default_lm = dspy.LM(
    model=SMALL_MODEL,
    api_key=OPENROUTER_API_KEY,
    api_base=OPENROUTER_BASE_URL,
    temperature=TEMPERATURE,
    max_tokens=MAX_TOKENS
)
dspy.configure(lm=default_lm)

print("\nConfiguration complete!")

SMALL_MODEL: openrouter/google/gemini-2.5-flash-lite-preview-06-17
BIG_MODEL: openrouter/openai/gpt-4.1-mini
TEMPERATURE: 1.0
MAX_TOKENS: 4000

Configuration complete!


## Test Individual Async Tools

In [None]:
# Test web_search
result = await web_search("DSPy framework", count=2)
print("=== Web Search Test ===")
print(result)

In [None]:
# Test wikipedia_search
result = wikipedia_search("Python programming", sentences=3)
print("=== Wikipedia Search Test ===")
print(result)

In [None]:
# Test async_batch_call
calls = [
    {"tool_name": "web_search", "args": {"query": "machine learning", "count": 2}},
    {"tool_name": "wikipedia_search", "args": {"query": "artificial intelligence", "sentences": 2}}
]

results = await async_batch_call(calls)
print("=== Async Batch Call Test ===")
for i, result in enumerate(results):
    print(f"\nResult {i+1}:")
    print(result[:200] + "..." if len(result) > 200 else result)

## Test Data Models

In [None]:
# Test QueryAnalysis model
test_analysis = QueryAnalysis(
    query_type="depth_first",
    complexity="medium",
    main_concepts=["AI", "machine learning"],
    key_entities=["GPT", "neural networks"],
    relationships=["AI includes machine learning"],
    notes="Current state of AI in 2024",
    answer_format="detailed analysis"
)

print("=== QueryAnalysis Model Test ===")
print(f"Query Type: {test_analysis.query_type}")
print(f"Complexity: {test_analysis.complexity}")
print(f"Main Concepts: {test_analysis.main_concepts}")
print(f"Key Entities: {test_analysis.key_entities}")
print(f"Answer Format: {test_analysis.answer_format}")

In [None]:
# Test PlanStep and ResearchPlan models
test_step = PlanStep(
    id=1,
    description="Research AI fundamentals",
    depends_on=[],
    budget_calls=5
)

test_plan = ResearchPlan(steps=[test_step])

print("=== PlanStep and ResearchPlan Test ===")
print(f"Step ID: {test_step.id}")
print(f"Description: {test_step.description}")
print(f"Budget: {test_step.budget_calls}")
print(f"Plan has {len(test_plan.steps)} steps")

## Test DSPy Modules

In [None]:
# Test AsyncLeadAgent - Query Analysis
test_query = "Compare machine learning frameworks for beginners"

lead_agent = AsyncLeadAgent()
analysis_result = await lead_agent.query_analyzer.acall(query=test_query)

print("=== AsyncLeadAgent Query Analysis Test ===")
print(f"Query: {test_query}")
print(f"\nAnalysis:")
print(f"Type: {analysis_result.analysis.query_type}")
print(f"Complexity: {analysis_result.analysis.complexity}")
print(f"Main Concepts: {analysis_result.analysis.main_concepts}")
print(f"Key Entities: {analysis_result.analysis.key_entities}")
print(f"Answer Format: {analysis_result.analysis.answer_format}")

In [None]:
# Test AsyncLeadAgent - Research Planning
# Use the analysis from the previous cell
plan_result = await lead_agent.planner.acall(
    query=test_query,
    analysis=analysis_result.analysis
)

print("=== AsyncLeadAgent Research Planning Test ===")
print(f"Plan has {len(plan_result.plan.steps)} steps:")

for step in plan_result.plan.steps:
    print(f"\nStep {step.id}: {step.description}")
    print(f"  Budget: {step.budget_calls} tool calls")
    print(f"  Depends on: {step.depends_on}")

In [None]:
# Test Full AsyncLeadAgent workflow
test_query_2 = "Research the latest developments in quantum computing"

analysis_full, plan_full = await lead_agent.aforward(test_query_2)

print("=== Full AsyncLeadAgent Workflow Test ===")
print(f"Query: {test_query_2}")
print(f"\n=== Analysis ===")
print(f"Type: {analysis_full.analysis.query_type}")
print(f"Complexity: {analysis_full.analysis.complexity}")
print(f"Main Concepts: {analysis_full.analysis.main_concepts}")

print(f"\n=== Plan ===")
print(f"Generated {len(plan_full.plan.steps)} steps:")
for step in plan_full.plan.steps:
    print(f"\nStep {step.id}: {step.description}")
    print(f"  Budget: {step.budget_calls} calls")

## Test Main Orchestration Functions

In [None]:
# Test run_research function
research_query = "Analyze the impact of AI on software development"

analysis_main, plan_main = await run_research(research_query, verbose=True)

print("\n=== run_research Function Test Complete ===")
print(f"Returned analysis type: {type(analysis_main)}")
print(f"Returned plan type: {type(plan_main)}")
print(f"Plan contains {len(plan_main.plan.steps)} steps")

In [None]:
# Test run_research_sync function
sync_query = "Compare Python and JavaScript for web development"

print("=== Testing Synchronous Wrapper ===")
sync_analysis, sync_plan = run_research_sync(sync_query, verbose=True)

print(f"\nSync function completed successfully!")
print(f"Analysis complexity: {sync_analysis.analysis.complexity}")
print(f"Plan steps: {len(sync_plan.plan.steps)}")

## Test Error Handling

In [None]:
# Test error handling in async_batch_call
error_calls = [
    {"tool_name": "web_search", "args": {"query": "valid search", "count": 2}},
    {"tool_name": "invalid_tool", "args": {"query": "this will fail"}},
    {"tool_name": "web_search", "args": {}},  # Missing required 'query'
]

print("=== Error Handling Test ===")
error_results = await async_batch_call(error_calls)

for i, result in enumerate(error_results):
    print(f"\nCall {i+1}:")
    if "[ERROR]" in result:
        print(f"❌ Error detected: {result[:100]}...")
    else:
        print(f"✅ Success: {result[:100]}...")

## Performance Testing

In [None]:
# Test performance - multiple parallel searches
import time

performance_calls = [
    {"tool_name": "web_search", "args": {"query": "artificial intelligence trends", "count": 2}},
    {"tool_name": "web_search", "args": {"query": "machine learning algorithms", "count": 2}},
    {"tool_name": "wikipedia_search", "args": {"query": "deep learning", "sentences": 2}},
    {"tool_name": "wikipedia_search", "args": {"query": "neural networks", "sentences": 2}}
]

print("=== Performance Test ===")
start_time = time.time()

perf_results = await async_batch_call(performance_calls)

end_time = time.time()
duration = end_time - start_time

print(f"\nCompleted {len(performance_calls)} parallel operations in {duration:.2f} seconds")
print(f"Average time per operation: {duration/len(performance_calls):.2f} seconds")
print(f"All operations completed successfully: {all('[ERROR]' not in r for r in perf_results)}")

## Tools Configuration Test

In [None]:
# Test TOOLS dictionary registration
print("=== Tools Configuration Test ===")
print(f"Registered tools: {list(TOOLS.keys())}")

for tool_name, tool in TOOLS.items():
    print(f"\n{tool_name}:")
    print(f"  Type: {type(tool)}")
    print(f"  Name: {getattr(tool, 'name', 'N/A')}")
    print(f"  Description: {getattr(tool, 'desc', 'N/A')[:50]}...")