In [35]:
from model_factory import get_model, ModelName
llm = get_model(ModelName.GPT41MINI)

# ORIGINAL CODE

In [None]:
from langchain_openai import ChatOpenAI
from langgraph_supervisor import create_supervisor
from langgraph.prebuilt import create_react_agent
from langchain_core.prompts import ChatPromptTemplate
model = ChatOpenAI(model="gpt-4o-mini")

# Create specialized agents

def add(a: float, b: float) -> float:
    """Add two numbers."""
    return a + b

def multiply(a: float, b: float) -> float:
    """Multiply two numbers."""
    return a * b

def web_search(query: str) -> str:
    """Search the web for information."""
    return (
        "Here are the headcounts for each of the FAANG companies in 2024:\n"
        "1. **Facebook (Meta)**: 67,317 employees.\n"
        "2. **Apple**: 164,000 employees.\n"
        "3. **Amazon**: 1,551,000 employees.\n"
        "4. **Netflix**: 14,000 employees.\n"
        "5. **Google (Alphabet)**: 181,269 employees."
    )

math_agent = create_react_agent(
    model=model,
    tools=[add, multiply],
    name="math_expert",
    prompt="You are a math expert. Always use one tool at a time."
)

research_agent = create_react_agent(
    model=model,
    tools=[web_search],
    name="research_expert",
    prompt="You are a world class researcher with access to web search. Do not do any math."
)

# Create supervisor workflow
workflow = create_supervisor(
    [research_agent, math_agent],
    model=model,
    prompt=(
        "You are a team supervisor managing a research expert and a math expert. "
        "For current events, use research_agent. "
        "For math problems, use math_agent."
    )
)

# Compile and run
app = workflow.compile()
result = app.invoke({
    "messages": [
        {
            "role": "user",
            "content": "what's the combined headcount of the FAANG companies in 2024?"
        }
    ]
})

In [30]:
code = """ 
from langchain_openai import ChatOpenAI
from langgraph_supervisor import create_supervisor
from langgraph.prebuilt import create_react_agent
from langchain_core.prompts import ChatPromptTemplate
model = ChatOpenAI(model="gpt-4o-mini")

# Create specialized agents

def add(a: float, b: float) -> float:
    \"\"\"Add two numbers.\"\"\"
    return a + b

def multiply(a: float, b: float) -> float:
    \"\"\"Multiply two numbers.\"\"\"
    return a * b

def web_search(query: str) -> str:
    \"\"\"Search the web for information.\"\"\"
    return (
        "Here are the headcounts for each of the FAANG companies in 2024:\n"
        "1. **Facebook (Meta)**: 67,317 employees.\n"
        "2. **Apple**: 164,000 employees.\n"
        "3. **Amazon**: 1,551,000 employees.\n"
        "4. **Netflix**: 14,000 employees.\n"
        "5. **Google (Alphabet)**: 181,269 employees."
    )

math_agent = create_react_agent(
    model=model,
    tools=[add, multiply],
    name="math_expert",
    prompt="You are a math expert. Always use one tool at a time."
)

research_agent = create_react_agent(
    model=model,
    tools=[web_search],
    name="research_expert",
    prompt="You are a world class researcher with access to web search. Do not do any math."
)

# Create supervisor workflow
workflow = create_supervisor(
    [research_agent, math_agent],
    model=model,
    prompt=(
        "You are a team supervisor managing a research expert and a math expert. "
        "For current events, use research_agent. "
        "For math problems, use math_agent."
    )
)

# Compile and run
app = workflow.compile()
result = app.invoke({
    "messages": [
        {
            "role": "user",
            "content": "what's the combined headcount of the FAANG companies in 2024?"
        }
    ]
})
"""

# JSON

In [23]:
app.get_graph().to_json()

{'nodes': [{'id': '__start__',
   'type': 'runnable',
   'data': {'id': ['langchain', 'schema', 'runnable', 'RunnablePassthrough'],
    'name': '__start__'}},
  {'id': 'supervisor',
   'type': 'runnable',
   'data': {'id': ['langgraph', 'graph', 'state', 'CompiledStateGraph'],
    'name': 'supervisor'}},
  {'id': 'research_expert',
   'type': 'runnable',
   'data': {'id': ['langgraph', 'utils', 'runnable', 'RunnableCallable'],
    'name': 'research_expert'}},
  {'id': 'math_expert',
   'type': 'runnable',
   'data': {'id': ['langgraph', 'utils', 'runnable', 'RunnableCallable'],
    'name': 'math_expert'}},
  {'id': '__end__'}],
 'edges': [{'source': '__start__', 'target': 'supervisor'},
  {'source': 'math_expert', 'target': 'supervisor'},
  {'source': 'research_expert', 'target': 'supervisor'},
  {'source': 'supervisor', 'target': 'math_expert', 'conditional': True},
  {'source': 'supervisor', 'target': 'research_expert', 'conditional': True},
  {'source': 'supervisor', 'target': '__en

In [None]:
json_str = """ 
{{'nodes': [{{'id': '__start__',
   'type': 'runnable',
   'data': {{'id': ['langchain', 'schema', 'runnable', 'RunnablePassthrough'],
    'name': '__start__'}},
  {{'id': 'supervisor',
   'type': 'runnable',
   'data': {{'id': ['langgraph', 'graph', 'state', 'CompiledStateGraph'],
    'name': 'supervisor'}},
  {{'id': 'research_expert',
   'type': 'runnable',
   'data': {{'id': ['langgraph', 'utils', 'runnable', 'RunnableCallable'],
    'name': 'research_expert'}},
  {{'id': 'math_expert',
   'type': 'runnable',
   'data': {{'id': ['langgraph', 'utils', 'runnable', 'RunnableCallable'],
    'name': 'math_expert'}},
  {{'id': '__end__'}],
 'edges': [{{'source': '__start__', 'target': 'supervisor'}},
  {{'source': 'math_expert', 'target': 'supervisor'},
  {{'source': 'research_expert', 'target': 'supervisor'}},
  {{'source': 'supervisor', 'target': 'math_expert', 'conditional': True},
  {{'source': 'supervisor', 'target': 'research_expert', 'conditional': True},
  {{'source': 'supervisor', 'target': '__end__', 'conditional': True}]}}"""

# USE CASE DRY RUNS

In [38]:
SYS_PROMPT= ChatPromptTemplate.from_template("""
You are given the json of a workflow graph below.
{json_str}
You are supposed to write use cases for the graph.
You will also do dry run of the graph with the use cases.
The use cases should be in the format of a list of dictionaries.
Each dictionary should have the following
keys:
- name: The name of the use case
- description: The description of the use case
- dry_run: The dry run of the use case
""")

In [39]:
use_cases  = llm.invoke(SYS_PROMPT.format(json_str=json_str))

In [41]:
use_cases.pretty_print()


Here are some use cases for the given workflow graph along with their dry runs.

```json
[
  {
    "name": "Simple Math Query",
    "description": "A user asks a math-related question. The supervisor routes the query to the math_expert for processing, then returns to the supervisor and ends the workflow.",
    "dry_run": [
      "__start__ receives input",
      "Input passed to supervisor",
      "Supervisor evaluates input and routes to math_expert",
      "math_expert processes the math query",
      "Result returned to supervisor",
      "Supervisor decides to end workflow",
      "Workflow ends at __end__"
    ]
  },
  {
    "name": "Research Question",
    "description": "A user asks a research-related question. The supervisor routes the query to the research_expert for processing, then returns to the supervisor and ends the workflow.",
    "dry_run": [
      "__start__ receives input",
      "Input passed to supervisor",
      "Supervisor evaluates input and routes to research_

# GENERATING TEST CASES

In [58]:
from bs4 import BeautifulSoup
import html2text
import httpx

def fetch_documents(url: str) -> str:
    """Fetch a document from a URL and return the markdownified text.

    Args:
        url (str): The URL of the document to fetch.

    Returns:
        str: The markdownified text of the document.
    """
    httpx_client = httpx.Client(follow_redirects=True, timeout=10)

    try:
        response = httpx_client.get(url, timeout=10)
        response.raise_for_status()
        html_content = response
        soup = BeautifulSoup(html_content, 'html.parser')
    
        img_tags = soup.find_all('img')
        for img_tag in img_tags:
            img_tag.decompose()

        target_div = soup.find('div', class_= "theme-doc-markdown markdown") #langchain
        
        if not target_div:
            target_div = soup.find('article') #langraph

        if not target_div:
            target_div = soup.find('html') #langraph

        if not target_div:
            return html2text.html2text(str(soup))
        
        return html2text.html2text(str(target_div))
    except (httpx.HTTPStatusError, httpx.RequestError) as e:
        return f"Encountered an HTTP error: {str(e)}"

In [70]:
TEST_GEN_PROMPT = ChatPromptTemplate.from_template("""
You are given the json of a workflow graph below.
<JSON>
{json_str}
</JSON>
You are also given the code of the graph below.
<CODE>
{code}
</CODE>
You are given the use cases for a workflow graph along with dry runs.
<USE_CASES>
{use_cases}
</USE_CASES>
                                                   
Below are documents which contain information about how to write test cases for the graph.
<DOCUMENTS>
{documents1}
{documents2}
</DOCUMENTS>
                                                   
You are supposed to write test cases for the graph in the <CODE> section, use the <JSON> section to understand the graph and the <USE_CASES> for generating test case inputs.
                                              
You are supposed to see the kind of tests that are being written in the <DOCUMENTS> section and write your own test cases in the same format.
The tests should cover the following:
Final response: The inputs are a prompt and an optional list of tools. The output is the final agent response.
Trajectory: As before, the inputs are a prompt and an optional list of tools. The output is the list of tool calls
Single step: As before, the inputs are a prompt and an optional list of tools. The output is the tool call.

Use pytest paramterize when possible.
Also include the code in the output at the top""")

In [71]:
final_file =llm.invoke(TEST_GEN_PROMPT.format(
    json_str=json_str,
    code=code,
    use_cases=use_cases,
    documents1=fetch_documents("https://docs.smith.langchain.com/evaluation/tutorials/testing"),
    documents2=fetch_documents("https://docs.smith.langchain.com/evaluation/tutorials/agents"),
))

In [72]:
final_file.pretty_print()


```python
import pytest
from langsmith import testing as t
from typing_extensions import Annotated, TypedDict

# Assuming the workflow app is imported as `app` from the compiled workflow in the <CODE> section
# For example:
# from my_workflow_module import app

# Since the graph has nodes: __start__, supervisor, math_expert, research_expert, __end__
# and the supervisor routes queries conditionally to math_expert or research_expert or ends directly,
# we will write tests for:
# - Final response correctness
# - Trajectory of tool calls (which expert was called)
# - Single step routing decision by supervisor

# We define a TypedDict for structured response if needed
class StructuredResponse(TypedDict):
    numeric_answer: Annotated[float | None, ...]
    text_answer: Annotated[str | None, ...]
    reasoning: str

# --- Final Response Tests ---

@pytest.mark.langsmith
@pytest.mark.parametrize(
    "query, expected_in_response",
    [
        (
            "what's the combined headcount o