In [99]:
from pprint import pprint
from typing import Any

from pydantic import BaseModel, RootModel, Field
from pydantic_ai import Agent, RunContext

agent = Agent("openai:gpt-4o-mini", name="test_case_generator", retries=1)

@agent.system_prompt
def test_case_system_prompt():
    return """You are a helpful test case generator. You will be given a description of functionality 
    and should generate test cases that thoroughly validate the described behavior.
    
    <TEST CASE OUTPUT FORMAT>
    name: <name of the test case>
    description: <description of the test case>
    input: <input values for the test case>
    expected_output: <expected output/behavior of the test case>
    preconditions: <any relevant preconditions for the test case>
    </TEST CASE OUTPUT FORMAT>

    <OUTPUT FORMAT: list of dictionaries>
    [<TEST CASE OUTPUT FORMAT>, <TEST CASE OUTPUT FORMAT>, ...]
    </OUTPUT FORMAT>

    Be aware of the number of test casaes the user wants and output the correct number of test cases in the correct output format.
    """

agent.result_validator = lambda x: isinstance(x, list)

In [92]:

Agent(aa="aa")

TypeError: Agent.__init__() got an unexpected keyword argument 'aa'

In [33]:
api_spec = {
    "paths": {
        "/pets": {
            "get": {
                "parameters": [
                    {
                        "name": "status",
                        "in": "query",
                        "type": "string",
                        "required": True,
                    }
                ]
            },
            "post": {
                "operationId": "adoptPet",
                "parameters": [
                    {
                        "name": "petId",
                        "in": "query", 
                        "type": "string",
                        "required": True,
                    }
                ],
                "responses": {
                    "200": {
                        "description": "Pet adoption successful",
                        "content": {
                            "application/json": {
                                "schema": {
                                    "type": "object",
                                    "properties": {
                                        "message": {
                                            "type": "string"
                                        },
                                        "adoptionId": {
                                            "type": "string"
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            },
            "post": {
                "requestBody": {
                    "content": {
                        "application/json": {
                            "schema": {
                                "type": "object",
                                "properties": {
                                    "name": {"type": "string"},
                                    "age": {"type": "integer"},
                                },
                            }
                        }
                    }
                }
            },
        }
    }
}

In [3]:
response = await agent.run("Generate one case for this API spec: " + str(api_spec))


In [7]:
response.data

'[\n    {\n        "name": "GetPetsByStatus",\n        "description": "Validate that the API retrieves pets correctly based on their status.",\n        "input": {\n            "status": "available"\n        },\n        "expected_output": {\n            "pets": [\n                {\n                    "name": "Buddy",\n                    "age": 3,\n                    "status": "available"\n                },\n                {\n                    "name": "Max",\n                    "age": 2,\n                    "status": "available"\n                }\n            ]\n        },\n        "preconditions": "The API should have pets with the status \'available\' in the database."\n    }\n]'

In [46]:
class TestCase(BaseModel):
    name: str
    description: str
    input_json: dict[str, Any] | list[dict[str, Any]] | None = None
    expected_output_prompt: str | None = None
    expected_output_json: dict[str, Any] | list[dict[str, Any]] | None = None
    preconditions: str | None = None

class SuiteTestCases(RootModel[list[TestCase]]):
    pass

test_cases = SuiteTestCases.model_validate_json(response.data)

pprint(test_cases.model_dump())

[{'description': 'Test retrieving a list of pets with a valid status',
  'expected_output_json': None,
  'expected_output_prompt': None,
  'input_json': None,
  'name': 'GetPetsByStatus',
  'preconditions': 'The API server is running and has pets with status '
                   "'available' in the database."}]


## Planning steps 

```txt
[User/Scenario Modeling] --> [Case Type Generator] --> [Data Expansion]  
                                   |                        |  
                                   v                        v  
                        [Context-Aware Generator] --> [Synthetic Data Output]  

```

### 1.1 User Modelling

In [34]:
from dataclasses import dataclass

class UserPersona(BaseModel):
    persona_type: str
    persona: str
    primary_intentions: str
    secondary_intentions: str

user_modelling_agent = Agent(
    "openai:gpt-4o-mini",
    name="user_modelling_agent", 
    retries=1,
    result_type=list[UserPersona]
)


@dataclass
class MyDeps:  
    known_users: str


@user_modelling_agent.system_prompt

def user_modelling_prompt(ctx: RunContext[MyDeps]) -> str:

    return f"""
    Role:
    You are a strategic analyst tasked with identifying high-level user and service personas that may interact with an API or ML/AI system. Your goal is to surface potential users (both individual and service-level) and their general intentions for engaging with the system. You will work from a mix of known users and inferred personas, expanding the list to ensure diverse representation. The output should guide further development of detailed scenarios and workflows.

    Objective:

    Identify key user personas (e.g., developers, analysts, operators) and service personas (e.g., monitoring services, data ingestion pipelines).
    Capture high-level intentions for each persona, representing their goals and types of interactions.
    Expand known user/service types into broader categories to ensure full-spectrum coverage.
    Distinguish between direct users (interacting directly with the API) and indirect users/services (operating through automated processes or third-party tools).
    Instructions:

    Persona Identification:
    {"The following users are known to interact with the system: " + ctx.deps.known_users if ctx.deps.known_users is not None else ''}

    Start with known user types or services that interact with the system.
    Expand the list by identifying adjacent personas or services that share similar goals or operate in overlapping domains.
    Consider both individual personas (e.g., data scientists, IT admins) and automated service personas (e.g., logging pipelines, monitoring tools).
    Intent Mapping:

    For each persona, identify primary intentions (e.g., data retrieval, model evaluation, anomaly detection).
    Include secondary intentions (e.g., performance optimization, adversarial testing) to reflect edge or less common use cases.
    Prioritize diverse goals that span operational, analytical, and exploratory use cases.
    Abstraction Levels:

    Keep intentions broad and conceptual (e.g., "monitor system health," "fetch analytics data").
    Avoid specific API endpoints or technical steps—focus on overarching objectives.
    """

In [35]:
response = await user_modelling_agent.run(
    "Generate the user personas for this API spec: " + str(api_spec),
    deps=MyDeps(known_users="A young man who wants to adopt a pet")
)

In [38]:
pprint(response.data)


[UserPersona(persona_type='Individual User', persona='Pet Adopter', primary_intentions='Explore available pets with specific statuses (e.g., available, adopted)', secondary_intentions='Learn about pet care and adoption processes'),
 UserPersona(persona_type='Individual User', persona='Pet Owner', primary_intentions='Update pet information such as name and age', secondary_intentions='Retrieve history of adopted pets or update status of pets'),
 UserPersona(persona_type='Developer', persona='API Integrator', primary_intentions='Integrate pet adoption API into applications for better user experience', secondary_intentions='Test endpoint functionalities for performance and reliability'),
 UserPersona(persona_type='Data Analyst', persona='Adoption Trend Analyst', primary_intentions='Analyze trends in pet adoptions based on status, age, and time periods', secondary_intentions='Generate reports on adoptions for stakeholders'),
 UserPersona(persona_type='Service', persona='Monitoring Service',

### 1.2 Test Case Family


In [43]:
from dataclasses import dataclass

class TestCaseFamily(BaseModel):
    name: str
    description: str
    test_case_type: str
    test_variations: list[str]

test_case_family_agent = Agent(
    "openai:gpt-4o-mini",
    name="test_case_family_agent", 
    retries=1,
    result_type=list[TestCaseFamily]
)


@dataclass
class TestCaseFamilyDeps:  
    pass


@test_case_family_agent.system_prompt
def test_case_family_prompt(ctx: RunContext[TestCaseFamilyDeps]) -> str:
    return f"""
    Role:
    You are a test case generation expert, responsible for expanding high-level user and service personas into detailed, diverse test cases. Your objective is to ensure the system is thoroughly validated across all types of scenarios, including normal workflows, edge cases, and stress tests. You anticipate potential system weaknesses by generating test cases that reflect both typical and extreme usage patterns.

    Objective:

    Generate detailed test cases covering normal, edge, and stress conditions for each persona or service.
    Simulate realistic API interactions while ensuring exhaustive coverage of potential failure points.
    Classify each test case by case type and expected outcome (e.g., success, failure, error handling).
    Instructions:

    Input Interpretation:

    Take high-level personas and use cases as input (e.g., frontend developer fetching data, automated ETL pipelines).
    For each persona or service, consider typical paths and potential deviations that may cause failures or inefficiencies.
    Case Generation:

    Normal Cases: Generate standard, expected interactions where the API functions as intended.
    Edge Cases: Push boundaries by creating scenarios that test minimum/maximum values, invalid input formats, or unusual API sequences.
    Stress Cases: Simulate high loads, frequent requests, or massive datasets to test system scalability and reliability.
    Parameter Variation:

    Generate test cases that vary API parameters, payload sizes, and data types to ensure broad coverage.
    Account for dependency relationships between fields (e.g., date fields must follow logical order).
    Classification:

    Tag each test case with the appropriate category:
    Normal – Routine, everyday interactions.
    Edge – Boundary conditions or rare inputs.
    """

In [44]:
out_personas = response.data

In [47]:
out_test_case_families_per_persona: dict[UserPersona, list[TestCaseFamily]] = dict()

for user_persona in out_personas:
    response = await test_case_family_agent.run(
        "Generate the test case families for this user persona: " + str(user_persona),
        deps=TestCaseFamilyDeps(known_users=user_persona)
    )
    out_test_case_families_per_persona[str(user_persona)] = response.data
    pprint(response.data)



[TestCaseFamily(name='Explore Available Pets by Status - Normal Flow', description='The user searches for pets that are available for adoption using the specified status filter.', test_case_type='Normal', test_variations=['Search for pets available for adoption with filters (e.g., age, breed)', 'Search for pets that have been adopted', 'Search for pets with no filters applied']),
 TestCaseFamily(name='Explore Available Pets by Status - Edge Case', description='The user searches using extreme filters for pet availability using statuses that may not return any results.', test_case_type='Edge', test_variations=["Search for pets with a status that does not exist (e.g., 'lost')", 'Search for pets with a very narrow breed filter that may return few to no results', 'Search for pets with an invalid age range (e.g., age 0 or -1)']),
 TestCaseFamily(name='Explore Available Pets by Status - Stress Test', description='The user performs multiple rapid searches for pets to test system response under

In [61]:
out_test_case_families_per_persona

{"persona_type='Individual User' persona='Pet Adopter' primary_intentions='Explore available pets with specific statuses (e.g., available, adopted)' secondary_intentions='Learn about pet care and adoption processes'": [TestCaseFamily(name='Explore Available Pets by Status - Normal Flow', description='The user searches for pets that are available for adoption using the specified status filter.', test_case_type='Normal', test_variations=['Search for pets available for adoption with filters (e.g., age, breed)', 'Search for pets that have been adopted', 'Search for pets with no filters applied']),
  TestCaseFamily(name='Explore Available Pets by Status - Edge Case', description='The user searches using extreme filters for pet availability using statuses that may not return any results.', test_case_type='Edge', test_variations=["Search for pets with a status that does not exist (e.g., 'lost')", 'Search for pets with a very narrow breed filter that may return few to no results', 'Search for 

### 1.3 Expanded Test Cases

In [67]:
sum(len(test_cases) for test_cases in out_test_case_families_per_persona.values())

46

In [96]:
from dataclasses import dataclass

class TestCase(BaseModel):
    name: str = Field(description="The name of the test case")
    description: str = Field(description="The description of the test case")
    path: str = Field(description="The path of the test case")
    method: str = Field(description="The method of the test case")
    input_json: dict[str, Any] | list[dict[str, Any]] | None = Field(description="The input values for the test case. Should strictly follow the api spec")
    expected_output_prompt: str | None = Field(description="The expected output/behavior of the test case")
    expected_output_json: dict[str, Any] | list[dict[str, Any]] | None = Field(description="The expected output/behavior of the test case")
    preconditions: str | None = Field(description="Any relevant preconditions for the test case")


test_case_generator_agent = Agent( 
    "openai:gpt-4o-mini",
    name="test_case_generator_agent", 
    retries=1,
    result_type=list[TestCase]
)


@dataclass
class TestCaseGeneratorDeps:  
    pass


@test_case_generator_agent.system_prompt
def test_case_generator_prompt(ctx: RunContext[TestCaseGeneratorDeps]) -> str:
    return f"""
    Role:
    You are a data refinement and expansion specialist, responsible for ensuring wide parameter coverage and generating realistic, constraint-aware data for API and ML/AI testing. Your task is to take high-level test cases and enrich them by expanding parameter ranges, exploring edge values, and ensuring the data reflects real-world patterns and constraints.

    Objective:

    Expand test cases by generating diverse parameter values, ensuring broad coverage of normal, edge, and extreme conditions.
    Apply realistic data constraints (e.g., date ranges, field dependencies) to avoid infeasible test scenarios.
    Maximize variability across inputs while adhering to domain-specific logic and operational limits.
    Instructions:

    Parameter Expansion:

    For each test case, vary key parameters (e.g., numeric ranges, string lengths, boolean flags).
    Generate values across full ranges, including minimum, maximum, and boundary values.
    Incorporate random sampling where appropriate to introduce variability.
    Constraint Application:

    Ensure expanded data respects logical dependencies (e.g., start_date must precede end_date).
    Reflect real-world limits (e.g., phone numbers must follow local formats, user IDs must be alphanumeric).
    Apply domain-specific constraints (e.g., healthcare data must pass validation rules, financial data must align with regulations).
    Data Types and Formats:

    Generate diverse formats for fields like dates, strings, and numerical values (e.g., ISO dates, various string encodings).
    Vary payload sizes, testing both minimal and maximal inputs.
    Edge and Adversarial Data:

    Create inputs that test unusual conditions (e.g., empty payloads, long strings, nested JSON structures).
    Ensure adversarial data (e.g., special characters, SQL injection patterns) is included to assess API security.
    """

NameError: name 'Field' is not defined

In [97]:
# only expand first family

out_test_case_families_per_persona[list(out_test_case_families_per_persona.keys())[0]]

out_test_cases_per_family: dict[TestCaseFamily, list[TestCase]] = dict()

for family in out_test_case_families_per_persona[list(out_test_case_families_per_persona.keys())[0]]:
    response = await test_case_generator_agent.run(
        f"Expand the test case family of tests: {family.name}. This possible tests variations are: {family.test_variations}. The api spec is: {api_spec}",
        deps=TestCaseGeneratorDeps()
    )
    out_test_cases_per_family[family.name] = response.data


In [98]:
out_test_cases_per_family[list(out_test_cases_per_family.keys())[0]][0]


TestCase(name='Search for pets available for adoption with filters (age and breed)', description='Retrieve pets that are available for adoption filtered by specific age and breed criteria.', input_json={'status': 'available', 'age': 2, 'breed': 'Labrador'}, expected_output_prompt='A list of available pets filtered by age and breed.', expected_output_json=None, preconditions='User has access to the pet adoption API and the filtering criteria are valid.')

## Mock executor

In [78]:
def mock_api(api_spec: dict):
    """
    Creates a mock API implementation based on the provided OpenAPI specification.
    
    Args:
        api_spec (dict): OpenAPI specification defining the API endpoints and schemas
        
    Returns:
        dict: Mock API implementation with endpoint handlers
    """
    # Initialize mock data store
    mock_data = {
        "pets": [
            {"id": "1", "name": "Buddy", "age": 3, "status": "available"},
            {"id": "2", "name": "Max", "age": 2, "status": "available"},
            {"id": "3", "name": "Luna", "age": 1, "status": "adopted"}
        ]
    }
    
    def get_pets(status: str = None):
        """Mock GET /pets endpoint"""
        if status:
            return {"pets": [pet for pet in mock_data["pets"] if pet["status"] == status]}
        return {"pets": mock_data["pets"]}
    
    def add_pet(name: str, age: int):
        """Mock POST /pets endpoint for adding new pets"""
        new_pet = {
            "id": str(len(mock_data["pets"]) + 1),
            "name": name,
            "age": age,
            "status": "available"
        }
        mock_data["pets"].append(new_pet)
        return new_pet
        
    def adopt_pet(pet_id: str):
        """Mock POST /pets endpoint for adopting pets"""
        for pet in mock_data["pets"]:
            if pet["id"] == pet_id and pet["status"] == "available":
                pet["status"] = "adopted"
                return {
                    "message": f"Successfully adopted pet {pet['name']}",
                    "adoptionId": f"ADOPT-{pet_id}"
                }
        return {"error": "Pet not found or not available"}
    
    return {
        "endpoints": {
            "GET /pets": get_pets,
            "POST /pets": add_pet,
            "POST /pets/adopt": adopt_pet
        }
    }

# Example usage
mock_api_instance = mock_api(api_spec)


In [84]:
pprint(out_test_cases_per_family[list(out_test_cases_per_family.keys())[0]][0].model_dump())


{'description': 'The user searches for pets available for adoption filtering '
                'by a specific age range (e.g., puppies under 1 year).',
 'expected_output_json': None,
 'expected_output_prompt': 'List of available pets under 1 year of age.',
 'input_json': None,
 'name': 'Normal Case - Search for available pets by age filter',
 'preconditions': 'User is logged in and has access to the pet search '
                  'function.'}


# Agent Inspector

In [100]:
class AgentInspector:
    @staticmethod
    def inspect_agent(agent: Agent) -> dict:
        """
        Inspects an Agent instance and returns its key attributes and configuration.
        
        Args:
            agent: An instance of Agent to inspect
            
        Returns:
            dict: Dictionary containing agent attributes and configuration
        """
        inspection_result = {
            "name": agent.name,
            "model": agent.model,
            "retries": agent.retries,
            "result_type": str(agent.result_type),
            "has_system_prompt": hasattr(agent, "system_prompt"),
        }
        return inspection_result
    
    @staticmethod
    def get_all_agents() -> list[Agent]:
        """
        Returns all Agent instances that have been created.
        
        Returns:
            list[Agent]: List of all Agent instances
        """
        return [obj for obj in globals().values() if isinstance(obj, Agent)]
    
AgentInspector.get_all_agents()

[Agent(model=OpenAIModel(model_name='gpt-4o-mini'), name='test_case_generator', end_strategy='early', model_settings=None),
 Agent(model=OpenAIModel(model_name='gpt-4o-mini'), name='user_modelling_agent', end_strategy='early', model_settings=None),
 Agent(model=OpenAIModel(model_name='gpt-4o-mini'), name='test_case_family_agent', end_strategy='early', model_settings=None),
 Agent(model=OpenAIModel(model_name='gpt-4o-mini'), name='test_case_generator_agent', end_strategy='early', model_settings=None)]