# Objective

To present an overview of the four important Agentic AI patterns - Reflection, Tool Use, Planning and Multi-Agent Collaboration.

# Setup

## Installation

In [1]:
! pip install -q openai==1.55.3 \
                 langchain==0.3.7 \
                 langchain-openai==0.2.9 \
                 langchain-experimental==0.3.3

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m389.6/389.6 kB[0m [31m12.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m12.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.4/50.4 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m209.0/209.0 kB[0m [31m13.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.4/2.4 MB[0m [31m24.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m311.8/311.8 kB[0m [31m12.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.2/3.2 MB[0m [31m33.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m20.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [2]:
! pip install -q git+https://github.com/openai/swarm.git

  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m71.4/71.4 kB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m218.7/218.7 kB[0m [31m7.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m99.1/99.1 kB[0m [31m5.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.3/4.3 MB[0m [31m31.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m469.0/469.0 kB[0m [31m16.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for swarm (pyproject.toml) ... [?25l[?25hdone


## Imports

In [3]:
import json

from openai import AzureOpenAI
from langchain import hub

from langchain.agents import create_react_agent, Tool, AgentExecutor
from langchain_core.tools import tool
from langchain_core.messages import HumanMessage, ToolMessage
from langchain_openai import AzureChatOpenAI
from langchain_experimental.utilities import PythonREPL

from swarm import Swarm, Agent

In [None]:
with open('config-azure.json') as f:
    configs = f.read()

In [None]:
creds = json.loads(configs)

In [None]:
client = AzureOpenAI(
    api_key=creds['AZURE_OPENAI_KEY'],
    azure_endpoint=creds['AZURE_OPENAI_ENDPOINT'],
    api_version='2024-02-01'
)

In [None]:
llm = AzureChatOpenAI(
    azure_endpoint=creds['AZURE_OPENAI_ENDPOINT'],
    api_key=creds['AZURE_OPENAI_KEY'],
    api_version="2024-02-01",
    model="gpt-4o-mini",
    temperature=0
)

In [38]:
swarm_client = Swarm(client)

# Pattern 1: Reflection

Self-reflection - Ask the LLM to reflect on its own work to improve its answer.

Consider the following use case where the LLM is tasked to extract structured information from medical notes. However, instead of asking the LLM to directly provide the answer, we present the output from the generator LLM to a reflector LLM (in this case the same model). The feedback from the reflector is used by the generator to improve its answer.

In [7]:
main_model = critique_model = 'gpt-4o-mini'

In [8]:
medical_note_data = """
Medical Notes:
---
Patient Name: Ms. Krishnaveni
Age: 45 years
Gender: Female

Chief Complaint:
Ms. Krishnaveni presented with complaints of persistent abdominal pain, bloating, and changes in bowel habits over the past two months.

History of Present Illness:
Ms. Krishnaveni reports experiencing intermittent abdominal pain, predominantly in the lower abdomen, accompanied by bloating and alternating episodes of diarrhea and constipation. She describes the pain as crampy in nature, relieved partially by defecation but worsening after meals. There is no association with specific food items. She denies any rectal bleeding, unintended weight loss, or fever.

Past Medical History:
Ms. Krishnaveni has a history of irritable bowel syndrome (IBS), diagnosed five years ago, managed with dietary modifications and occasional use of over-the-counter antispasmodics.

Medications:
She occasionally takes over-the-counter antispasmodics for symptomatic relief of abdominal discomfort related to IBS.

Family History:
There is no significant family history of gastrointestinal disorders or malignancies.

Social History:
Ms. Krishnaveni is a non-smoker and does not consume alcohol. She works as a teacher in a local school.
"""

The system message for the generator is below:

In [9]:
system_message = """
You are an expert assistant to a hospital administration team working on extracting important information from medical notes made by doctors.
Extract relevant information from the note presenteed by the user with the following schema.
- age: integer, age of the patient
- gender: string, can be one of male, female or other
- diagnosis: string, can be one of migraine, diabetes, arthritis and acne
- weight: integer, weight of the patient
- smoking: string, can be one of yes or no
Use information ONLY from the medical note to come up with the JSON output.

If you receive feedback from the user, use it to provide a revised version of your answer.
"""

Notice how the system message here explicitly acknowledges that feedback might be provided and should be used to improve the answer.

In [10]:
reflection_system_message = """
You are an expert assistant to a hospital administration team who is tasked to generate critique and recommendations for output from an LLM.
The input will contain an attempt by an LLM to extract relevant information in a JSON format of a medical note presented further below
The LLM was instructed that the JSON output needs to be extracted according to the following schema.
- age: integer, age of the patient
- gender: string, can be one of male, female or other
- diagnosis: string, can be one of migraine, diabetes, arthritis and acne
- weight: integer, weight of the patient
- smoking: string, can be one of yes or no

When you review the LLM attempt ensure that your critique is in accordance with the above schema.
While you are checking the input entered by the user, check if the input contains only the JSON and no additional information.
Provide explicit feedback if you notice additional information apart from the JSON.
Do not provide any suggestions for the output; restrict yourself to feedback.
---
{medical_note_data}
"""

Now let us run the first generation.

In [11]:
first_response = client.chat.completions.create(
    model=main_model,
    messages=[
        {'role': 'system', 'content': system_message},
        {'role': 'user', 'content': medical_note_data}
    ],
    temperature=0.2
).choices[0].message.content

In [12]:
print(first_response)

```json
{
  "age": 45,
  "gender": "female",
  "diagnosis": "other",
  "weight": null,
  "smoking": "no"
}
```


We will now present this output to the reflector that will present a critique according to our instructions above.

In [13]:
first_critique = client.chat.completions.create(
    model=critique_model,
    messages=[
        {'role': 'system', 'content': reflection_system_message},
        {'role': 'user', 'content': first_response}
    ],
    temperature=0
).choices[0].message.content

In [14]:
print(first_critique)

The JSON output provided contains several issues according to the specified schema:

1. **Diagnosis**: The value for "diagnosis" is "other," which is not one of the allowed options (migraine, diabetes, arthritis, or acne). This needs to be corrected to one of the specified diagnoses.

2. **Weight**: The value for "weight" is null. According to the schema, this should be an integer representing the patient's weight. A valid integer value needs to be provided.

Additionally, I noticed that the JSON output is presented correctly without any additional information, which is in line with the requirements. 

Please address the issues mentioned above to ensure compliance with the schema.


As can be seen from the above output, the reflector identified several issues with the output. We can now present this critique as feedback to the original generator so it can amend its response.

In [15]:
second_response = client.chat.completions.create(
    model=main_model,
    messages=[
        {'role': 'system', 'content': system_message},
        {'role': 'user', 'content': medical_note_data},
        {'role': 'assistant', 'content': first_response},
        {'role': 'user', 'content': first_critique}
    ],
    temperature=0.2
).choices[0].message.content

In [16]:
print(second_response)

```json
{
  "age": 45,
  "gender": "female",
  "diagnosis": "other",
  "weight": 0,
  "smoking": "no"
}
```


As we can see from the output above, the critique can be used to improve the response over a series of reflective interventions.

# Pattern 2: Tool Use

Let us see how tool use can augement LLM capabilities with a simple example. First, we beign by defining a series of Python functions that we then wrap as LangChain tools using the `@tool` decorator.

In [17]:
@tool
def add(a: int, b: int) -> int:
    """Adds a and b.

    Args:
        a: first int
        b: second int
    """
    return a + b

@tool
def multiply(a: int, b: int) -> int:
    """Multiplies a and b.

    Args:
        a: first int
        b: second int
    """

    return a * b

@tool
def subtract(a: int, b: int) -> int:
    """Subtract b from a

    Args:
        a: first int
        b: second int
    """

    return a - b

Note that the doc strings of the function describing what the functions do is a critical input parsed by the tool to understand which function needs to be called when a user input is received.

We then collect these tools into a dictionary with function names as the keys like so:

In [18]:
available_tools = {
    'add': add,
    'multiply': multiply,
    'subtract': subtract
}

We can now create a basic agent by binding the LLM with these three tools.

In [19]:
agent = llm.bind_tools(list(available_tools.values()))

Now our agent is capable of answering questions that can be resolved as evaluations of the three functions available to it as tools. Consider the following user query.

In [20]:
query = "Which is bigger: 3 * 12 or 11 + 49? what is the difference between the two"

messages = [HumanMessage(query)]

In [21]:
agent.invoke(messages).tool_calls

[{'name': 'multiply',
  'args': {'a': 3, 'b': 12},
  'id': 'call_kcsuaZejMqDtsKwTR2O7hSi0',
  'type': 'tool_call'},
 {'name': 'add',
  'args': {'a': 11, 'b': 49},
  'id': 'call_U7q09AM2e1R5sc7YUJXcR7ls',
  'type': 'tool_call'}]

Notice how the LLM behavior changed. Instead of answering the question correctly, it has composed a tool call output that is an intermediate step to answer the question from the user.

Specifically, it has correctly recognized that it has to call the functions `multiply` and `add` with the correct arguments in order to answer the user question.

Note that this is still a partial execution of a tool-calling agent. We will see an end-to-end execution of the tool-calling agent in a future session.

# Pattern 3: Planning

Planning agents utilize a specified algorithm to plan/structure their efforts to achieve a business objective. Let us see an example of a Reasoning and Action (ReAct) agent. We will take a much deeper look into ReAct agents in upcoming sessions.

In [22]:
react_prompt = hub.pull("hwchase17/react")



In [23]:
print(react_prompt.template)

Answer the following questions as best you can. You have access to the following tools:

{tools}

Use the following format:

Question: the input question you must answer
Thought: you should always think about what to do
Action: the action to take, should be one of [{tool_names}]
Action Input: the input to the action
Observation: the result of the action
... (this Thought/Action/Action Input/Observation can repeat N times)
Thought: I now know the final answer
Final Answer: the final answer to the original input question

Begin!

Question: {input}
Thought:{agent_scratchpad}


As the above prompt template indicates, the LLM is asked to 'think' through before answering in a Thought/Action/Observation sequence till a final answer is reached.

With this prompt, let us now create a simple Python agent that will always use the Python interpretor to answer user queries.

In [24]:
python_repl = PythonREPL()
repl_tool = Tool(
    name="python_repl",
    description="A Python shell used to execute python commands. Input should be a valid python command.",
    func=python_repl.run,
)

As we have seen in previous examples, the `repl_tool` is basically a wrapper around the `python_repl` function. We can now create the ReAct agent by binding this `repl_tool` to the LLM like so:

In [25]:
react_agent = create_react_agent(
    llm=llm,
    tools=[repl_tool],
    prompt=react_prompt
)

In `LangChain` agent execution is handled by executors that track the tool calls and execute them in dedicated threads.

In [26]:
react_agent_executor = AgentExecutor(
    agent=react_agent,
    tools=[repl_tool],
    verbose=True
)

Let us now test our Python tool-calling agent with a non-trivial math problem.

In [27]:
user_input = "If USD 450 amounts to USD 630 in 6 years, what will it amount to in 2 years at the same interest rate?"

In [29]:
react_agent_executor.invoke(
    {
        'input': user_input
    }
)



[1m> Entering new AgentExecutor chain...[0m




[32;1m[1;3mTo solve this problem, we first need to determine the interest rate based on the information given. We know that USD 450 grows to USD 630 in 6 years. We can use the formula for compound interest or simple interest to find the rate. 

Let's calculate the interest rate first. The formula for simple interest is:

\[ A = P(1 + rt) \]

Where:
- \( A \) is the amount of money accumulated after n years, including interest.
- \( P \) is the principal amount (the initial amount of money).
- \( r \) is the annual interest rate (in decimal).
- \( t \) is the time the money is invested or borrowed for, in years.

From the problem:
- \( A = 630 \)
- \( P = 450 \)
- \( t = 6 \)

We can rearrange the formula to solve for \( r \):

\[ r = \frac{A - P}{Pt} \]

After finding the interest rate, we can then use it to find out how much USD 450 will amount to in 2 years using the same formula.

Let's perform the calculations. 

Action: python_repl
Action Input: "P = 450; A = 630; t = 6; r = (A 

{'input': 'If USD 450 amounts to USD 630 in 6 years, what will it amount to in 2 years at the same interest rate?',
 'output': 'Agent stopped due to iteration limit or time limit.'}

# Pattern 4: Multi-agent Collaboration

In one pattern of multi-agent collaboration called the `Triage` mode, a focal agent is tasked with handing-off tasks to appropriate agents. As an example, consider the following scenario where there are two agents - A & B. Agent A is the main agent that has two tools - a function to greet customers and to transfer control to Agent B. Agent B can only speak in Hindi, but can only be reached when Agent A hands-off control.

Whether the control needs to reach Agent B or not is decided by Agent A depending on the user query.

We will look at many more patterns of multi-agent collaborations in an upcoming session.

In [30]:
def transfer_to_agent_b():
    return agent_b

In [31]:
def transfer_to_agent_a():
    return agent_a

In [32]:
def greet_customer():
    return "Hello, how can I help you?"

In [42]:
agent_a = Agent(
    name="Agent A",
    instructions="You are a helpful agent.",
    model='gpt-4o-mini',
    functions=[transfer_to_agent_b, greet_customer]
)

agent_b = Agent(
    name="Agent B",
    instructions="Only speak in Hindi.",
    model='gpt-4o-mini',
    functions=[transfer_to_agent_a]
)

In [43]:
response = swarm_client.run(
    agent=agent_a,
    messages=[{"role": "user", "content": "I need to speak to Agent B"}],
)

In [44]:
print(response.messages[-1]["content"])

आप एजन्ट बी से बात कर रहे हैं। कैसे मदद कर सकता हूँ?


In [45]:
len(response.messages)

3

In [46]:
for message in response.messages:
    print(message)
    print('----')

{'content': None, 'refusal': None, 'role': 'assistant', 'audio': None, 'function_call': None, 'tool_calls': [{'id': 'call_Vy7tQ7hcI6wWXQi38w8jwQRC', 'function': {'arguments': '{}', 'name': 'transfer_to_agent_b'}, 'type': 'function'}], 'sender': 'Agent A'}
----
{'role': 'tool', 'tool_call_id': 'call_Vy7tQ7hcI6wWXQi38w8jwQRC', 'tool_name': 'transfer_to_agent_b', 'content': '{"assistant": "Agent B"}'}
----
{'content': 'आप एजन्ट बी से बात कर रहे हैं। कैसे मदद कर सकता हूँ?', 'refusal': None, 'role': 'assistant', 'audio': None, 'function_call': None, 'tool_calls': None, 'sender': 'Agent B'}
----
