In [35]:
from model_factory import get_model, ModelName
llm = get_model(ModelName.GPT41MINI)

# ORIGINAL CODE

In [None]:
from langchain_openai import ChatOpenAI
from langgraph_supervisor import create_supervisor
from langgraph.prebuilt import create_react_agent
from langchain_core.prompts import ChatPromptTemplate
model = ChatOpenAI(model="gpt-4o-mini")

# Create specialized agents

def add(a: float, b: float) -> float:
    """Add two numbers."""
    return a + b

def multiply(a: float, b: float) -> float:
    """Multiply two numbers."""
    return a * b

def web_search(query: str) -> str:
    """Search the web for information."""
    return (
        "Here are the headcounts for each of the FAANG companies in 2024:\n"
        "1. **Facebook (Meta)**: 67,317 employees.\n"
        "2. **Apple**: 164,000 employees.\n"
        "3. **Amazon**: 1,551,000 employees.\n"
        "4. **Netflix**: 14,000 employees.\n"
        "5. **Google (Alphabet)**: 181,269 employees."
    )

math_agent = create_react_agent(
    model=model,
    tools=[add, multiply],
    name="math_expert",
    prompt="You are a math expert. Always use one tool at a time."
)

research_agent = create_react_agent(
    model=model,
    tools=[web_search],
    name="research_expert",
    prompt="You are a world class researcher with access to web search. Do not do any math."
)

# Create supervisor workflow
workflow = create_supervisor(
    [research_agent, math_agent],
    model=model,
    prompt=(
        "You are a team supervisor managing a research expert and a math expert. "
        "For current events, use research_agent. "
        "For math problems, use math_agent."
    )
)

# Compile and run
app = workflow.compile()
result = app.invoke({
    "messages": [
        {
            "role": "user",
            "content": "what's the combined headcount of the FAANG companies in 2024?"
        }
    ]
})

In [30]:
code = """ 
from langchain_openai import ChatOpenAI
from langgraph_supervisor import create_supervisor
from langgraph.prebuilt import create_react_agent
from langchain_core.prompts import ChatPromptTemplate
model = ChatOpenAI(model="gpt-4o-mini")

# Create specialized agents

def add(a: float, b: float) -> float:
    \"\"\"Add two numbers.\"\"\"
    return a + b

def multiply(a: float, b: float) -> float:
    \"\"\"Multiply two numbers.\"\"\"
    return a * b

def web_search(query: str) -> str:
    \"\"\"Search the web for information.\"\"\"
    return (
        "Here are the headcounts for each of the FAANG companies in 2024:\n"
        "1. **Facebook (Meta)**: 67,317 employees.\n"
        "2. **Apple**: 164,000 employees.\n"
        "3. **Amazon**: 1,551,000 employees.\n"
        "4. **Netflix**: 14,000 employees.\n"
        "5. **Google (Alphabet)**: 181,269 employees."
    )

math_agent = create_react_agent(
    model=model,
    tools=[add, multiply],
    name="math_expert",
    prompt="You are a math expert. Always use one tool at a time."
)

research_agent = create_react_agent(
    model=model,
    tools=[web_search],
    name="research_expert",
    prompt="You are a world class researcher with access to web search. Do not do any math."
)

# Create supervisor workflow
workflow = create_supervisor(
    [research_agent, math_agent],
    model=model,
    prompt=(
        "You are a team supervisor managing a research expert and a math expert. "
        "For current events, use research_agent. "
        "For math problems, use math_agent."
    )
)

# Compile and run
app = workflow.compile()
result = app.invoke({
    "messages": [
        {
            "role": "user",
            "content": "what's the combined headcount of the FAANG companies in 2024?"
        }
    ]
})
"""

# JSON

In [23]:
app.get_graph().to_json()

{'nodes': [{'id': '__start__',
   'type': 'runnable',
   'data': {'id': ['langchain', 'schema', 'runnable', 'RunnablePassthrough'],
    'name': '__start__'}},
  {'id': 'supervisor',
   'type': 'runnable',
   'data': {'id': ['langgraph', 'graph', 'state', 'CompiledStateGraph'],
    'name': 'supervisor'}},
  {'id': 'research_expert',
   'type': 'runnable',
   'data': {'id': ['langgraph', 'utils', 'runnable', 'RunnableCallable'],
    'name': 'research_expert'}},
  {'id': 'math_expert',
   'type': 'runnable',
   'data': {'id': ['langgraph', 'utils', 'runnable', 'RunnableCallable'],
    'name': 'math_expert'}},
  {'id': '__end__'}],
 'edges': [{'source': '__start__', 'target': 'supervisor'},
  {'source': 'math_expert', 'target': 'supervisor'},
  {'source': 'research_expert', 'target': 'supervisor'},
  {'source': 'supervisor', 'target': 'math_expert', 'conditional': True},
  {'source': 'supervisor', 'target': 'research_expert', 'conditional': True},
  {'source': 'supervisor', 'target': '__en

In [None]:
json_str = """ 
{{'nodes': [{{'id': '__start__',
   'type': 'runnable',
   'data': {{'id': ['langchain', 'schema', 'runnable', 'RunnablePassthrough'],
    'name': '__start__'}},
  {{'id': 'supervisor',
   'type': 'runnable',
   'data': {{'id': ['langgraph', 'graph', 'state', 'CompiledStateGraph'],
    'name': 'supervisor'}},
  {{'id': 'research_expert',
   'type': 'runnable',
   'data': {{'id': ['langgraph', 'utils', 'runnable', 'RunnableCallable'],
    'name': 'research_expert'}},
  {{'id': 'math_expert',
   'type': 'runnable',
   'data': {{'id': ['langgraph', 'utils', 'runnable', 'RunnableCallable'],
    'name': 'math_expert'}},
  {{'id': '__end__'}],
 'edges': [{{'source': '__start__', 'target': 'supervisor'}},
  {{'source': 'math_expert', 'target': 'supervisor'},
  {{'source': 'research_expert', 'target': 'supervisor'}},
  {{'source': 'supervisor', 'target': 'math_expert', 'conditional': True},
  {{'source': 'supervisor', 'target': 'research_expert', 'conditional': True},
  {{'source': 'supervisor', 'target': '__end__', 'conditional': True}]}}"""

# USE CASE DRY RUNS

In [38]:
SYS_PROMPT= ChatPromptTemplate.from_template("""
You are given the json of a workflow graph below.
{json_str}
You are supposed to write use cases for the graph.
You will also do dry run of the graph with the use cases.
The use cases should be in the format of a list of dictionaries.
Each dictionary should have the following
keys:
- name: The name of the use case
- description: The description of the use case
- dry_run: The dry run of the use case
""")

In [39]:
use_cases  = llm.invoke(SYS_PROMPT.format(json_str=json_str))

In [41]:
use_cases.pretty_print()


Here are some use cases for the given workflow graph along with their dry runs.

```json
[
  {
    "name": "Simple Math Query",
    "description": "A user asks a math-related question. The supervisor routes the query to the math_expert for processing, then returns to the supervisor and ends the workflow.",
    "dry_run": [
      "__start__ receives input",
      "Input passed to supervisor",
      "Supervisor evaluates input and routes to math_expert",
      "math_expert processes the math query",
      "Result returned to supervisor",
      "Supervisor decides to end workflow",
      "Workflow ends at __end__"
    ]
  },
  {
    "name": "Research Question",
    "description": "A user asks a research-related question. The supervisor routes the query to the research_expert for processing, then returns to the supervisor and ends the workflow.",
    "dry_run": [
      "__start__ receives input",
      "Input passed to supervisor",
      "Supervisor evaluates input and routes to research_

# EXAMPLE CODE FILE

In [42]:
pytest_infra= """
from langsmith import testing as t
from langchain_openai import ChatOpenAI
from langgraph.graph import START, END, StateGraph, MessagesState
from langgraph.prebuilt import ToolNode, tools_condition
from langchain_core.messages import HumanMessage, ToolMessage, SystemMessage
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.tools import tool

@tool
def increment_by1_tool(x: str) -> str:
    \"\"\"This is called when user query pertains to ading by 1\"\"\"
    try:
        x = int(x)
        return str(x + 1)
    except ValueError:
        return "Invalid input, please provide a number."

SYSTEM_PROMPT = ChatPromptTemplate.from_template(
    \"\"\"
You are a helpful assistant tasked with answering user queries.

You are given a list of messages. Your job is to analyze the full message history, focusing especially on the last message in the list.

Instructions:

Determine if a tool needs to be called to answer the user's question.
Only call a tool if it is required to answer the question.
Do not call any tool more than once per question.
If a tool has already been called and provided a response, use that tool response to answer the question.
If no tool is needed, answer the user directly.
Available tools:

increment_by1_tool: Adds 1 to a number. Input and output are strings.
Be efficient and avoid redundant tool usage.\"\"\"
)

def node_a(state: MessagesState):
    llm = ChatOpenAI(model= "gpt-4o-mini", temperature=0)
    llm_with_tools = llm.bind_tools([increment_by1_tool])
    response = llm_with_tools.invoke(
    [SystemMessage(content=SYSTEM_PROMPT.format())] + state["messages"])
    return {
        "messages": [response]
    }

workflow = StateGraph(MessagesState)
workflow.add_node("node_a", node_a)
workflow.add_node("tools", ToolNode([increment_by1_tool]))

workflow.add_edge(START, "node_a")
workflow.add_conditional_edges("node_a", tools_condition, ["tools", END])
workflow.add_edge("tools", "node_a")
graph = workflow.compile()

###################### tests/test_my_app.py ######################
import pytest
from langsmith import testing as t

# adding a LLM judge on the final response
@pytest.mark.langsmith  # <-- Mark as a LangSmith test case
def test_langgraph_response() -> None:
    user_query = "How are you"
    t.log_inputs({"user_query": user_query})  # <-- Log example inputs, optional

    expected = "I'm just a computer program, so I don't have feelings, but I'm here and ready to help you! How can I assist you today?"
    t.log_reference_outputs({"sql": expected})  # <-- Log example reference outputs, optional

    response = graph.invoke({"messages": [HumanMessage(content=user_query)]})
    t.log_outputs({"response": response})  # <-- Log run outputs, optional
    
    # t.log_feedback(key="valid_sql", score=is_valid_sql(sql))  # <-- Log feedback, optional

    assert response["messages"][-1].content == expected  # <-- Test pass/fail status automatically logged to LangSmith under 'pass' feedback key

def run_graph(inputs: dict) -> dict:
        \"\"\"Run graph and track the trajectory it takes along with the final response.\"\"\"
        trajectory = []
        # Set subgraph=True to stream events from subgraphs of the main graph: https://langchain-ai.github.io/langgraph/how-tos/streaming-subgraphs/
        # Set stream_mode="debug" to stream all possible events: https://langchain-ai.github.io/langgraph/concepts/streaming
        for namespace, chunk in graph.stream({"messages": [
                {
                    "role": "user",
                    "content": inputs['user_query'],
                }
            ]}, subgraphs=True, stream_mode="debug"):
            # Event type for entering a node
            if chunk['type'] == 'task':
                # Record the node name
                trajectory.append(chunk['payload']['name'])
                # Given how we defined our dataset, we also need to track when specific tools are
                # called by our question answering ReACT agent. These tool calls can be found
                # when the ToolsNode (named "tools") is invoked by looking at the AIMessage.tool_calls
                # of the latest input message.
                if chunk['payload']['name'] == 'tools' and chunk['type'] == 'task':
                    for tc in chunk['payload']['input']['messages'][-1].tool_calls:
                        trajectory.append(tc['name'])

        return {"trajectory": trajectory}


@pytest.mark.langsmith  # <-- Mark as a LangSmith test case
# parametrize the test with different inputs and outputs
@pytest.mark.parametrize(
    "user_query, expected",
    [
        ("What is 1 added to 3", ["node_a", "tools", "increment_by1_tool", "node_a"]),
        ("How are you", ["node_a"]),
        ("What is your name?",["node_a"])
    ],
)
def test_graph_tract(user_query: str, expected: list) -> None:
    t.log_inputs({"user_query": user_query})
    trajectory = run_graph({"user_query": user_query})
    assert trajectory['trajectory'] == expected

@pytest.mark.langsmith
def test_searches_for_correct_ticker() -> None:
  \"\"\"Test that the model looks up the correct ticker on simple query.\"\"\"
  # Log the test example
  query = "What is 1 added to 3"
  t.log_inputs({"query": query})
  expected = "3"
  t.log_reference_outputs({"ticker": expected})

  # Call the agent's model node directly instead of running the full ReACT loop.
  result = graph.nodes["node_a"].invoke(
      {"messages": [{"role": "user", "content": query}]}
  )
  tool_calls = result["messages"][0].tool_calls
  if tool_calls[0]["name"] == increment_by1_tool.name:
      actual = tool_calls[0]["args"]["x"]
  else:
      actual = None
  t.log_outputs({"ticker": actual})

  # Check that the right ticker was queried
  assert actual == expected"""

# GENERATING TEST CASES

In [54]:
TEST_GEN_PROMPT = ChatPromptTemplate.from_template("""
You are given the json of a workflow graph below.
<JSON>
{json_str}
</JSON>
You are also given the code of the graph below.
<CODE>
{code}
</CODE>
You are given the use cases for a workflow graph along with dry runs.
<USE_CASES>
{use_cases}
</USE_CASES>
                                                   
Below is an example of a python file with the graph code and the accompanying pytest test cases.
<PYTEST_EXAMPLE>
{pytest_infra}
</PYTEST_EXAMPLE>
                                                   
You are supposed to write test cases for the graph in the <CODE> section, use the <JSON> section to understand the graph and the <USE_CASES> for generating test case inputs.
                                              
You are supposed to see the kind of tests that are being written in the <PYTEST_EXAMPLE> section and write your own test cases in the same format.
Also include the code in the output at the top""")

In [55]:
final_file =llm.invoke(TEST_GEN_PROMPT.format(
    json_str=json_str,
    code=code,
    use_cases=use_cases,
    pytest_infra=pytest_infra
))

In [56]:
final_file.pretty_print()


```python
from langsmith import testing as t
from langchain_openai import ChatOpenAI
from langgraph_supervisor import create_supervisor
from langgraph.prebuilt import create_react_agent
from langchain_core.prompts import ChatPromptTemplate
import pytest

# Re-define the functions and agents from the original code for testing

def add(a: float, b: float) -> float:
    """Add two numbers."""
    return a + b

def multiply(a: float, b: float) -> float:
    """Multiply two numbers."""
    return a * b

def web_search(query: str) -> str:
    """Search the web for information."""
    return (
        "Here are the headcounts for each of the FAANG companies in 2024:\n"
        "1. **Facebook (Meta)**: 67,317 employees.\n"
        "2. **Apple**: 164,000 employees.\n"
        "3. **Amazon**: 1,551,000 employees.\n"
        "4. **Netflix**: 14,000 employees.\n"
        "5. **Google (Alphabet)**: 181,269 employees."
    )

model = ChatOpenAI(model="gpt-4o-mini")

math_agent = create_react_agent(