# LLM-powered AI Agents

Table of contents
1. Understanding LLMs
2. Tools
3. Chat-based AI Agents
4. Service-based AI agents

In [1]:
from language_models.proxy_client import BTPProxyClient
from language_models.settings import settings

proxy_client = BTPProxyClient(
    client_id=settings.CLIENT_ID,
    client_secret=settings.CLIENT_SECRET,
    auth_url=settings.AUTH_URL,
    api_base=settings.API_BASE,
)

## 1. Understanding LLMs

In [2]:
from language_models.models.llm import OpenAILanguageModel, ChatMessage, ChatMessageRole

In [3]:
llm = OpenAILanguageModel(
    proxy_client=proxy_client,
    model="gpt-35-turbo",
    max_tokens=256,
    temperature=0.0,
)

In [4]:
prompt = """Take the following movie review and determine the sentiment of the review.

Movie review:
Wow! This movie was incredible. The acting was superb, and
the plot kept me on the edge of my seat. I highly recommend it!
"""

response = llm.get_completion([ChatMessage(role=ChatMessageRole.USER, content=prompt)])
print(response)

Sentiment: Positive


In [5]:
prompt = """Take the following movie review and determine the sentiment of the review.

Movie review:
Wow! This movie was incredible. The acting was superb, and
the plot kept me on the edge of my seat. I highly recommend it!

Respond with positive or negative.
"""

response = llm.get_completion([ChatMessage(role=ChatMessageRole.USER, content=prompt)])
print(response)

positive


In [6]:
system_prompt = "Take the following movie review and determine the sentiment of the review. Respond with 1 (positive) or 0 (negative)."

prompt = "Wow! This movie was incredible. The acting was superb, and the plot kept me on the edge of my seat. I highly recommend it!"

response = llm.get_completion([
    ChatMessage(role=ChatMessageRole.SYSTEM, content=system_prompt), 
    ChatMessage(role=ChatMessageRole.USER, content=prompt),
])
print(response)

1


In [7]:
system_prompt = "Take the following movie review determine the sentiment of the review. Respond with 1 (positive) or 0 (negative)."

prompt = "Will it rain in Seattle today?"

response = llm.get_completion([
    ChatMessage(role=ChatMessageRole.SYSTEM, content=system_prompt), 
    ChatMessage(role=ChatMessageRole.USER, content=prompt),
])
print(response)

I'm sorry, I am an AI language model and I do not have access to real-time weather information. I recommend checking a reliable weather website or using a weather app to get the most accurate and up-to-date forecast for Seattle.


In [8]:
system_prompt = """Take the following movie review and determine the sentiment of the review. 

Respond with 1 (positive) or 0 (negative).

If you don't receive a movie review, respond with -1.
"""

prompt = "Will it rain in Seattle today?"

response = llm.get_completion([
    ChatMessage(role=ChatMessageRole.SYSTEM, content=system_prompt), 
    ChatMessage(role=ChatMessageRole.USER, content=prompt),
])
print(response)

-1


## 2. Tools

In [9]:
import json
from language_models.tools.tool import Tool
from pydantic import BaseModel, Field
from typing import Any

In [10]:
prompt = "Total Raw Cost = $549.72 + $6.98 + $41.00 + $35.00 + $552.00 + $76.16 + $29.12" # answer: $1,289.98

response = llm.get_completion([ChatMessage(role=ChatMessageRole.USER, content=prompt)])
print(response)

Total Raw Cost = $1,290.98


In [11]:
def calculator(expression: str) -> Any:
    return eval(expression)

class Calculator(BaseModel):
    expression: str = Field(description="A math expression.")

calculator_tool = Tool(
    func=calculator,
    name="Calculator",
    description="Use this tool when you want to do calculations.",
    args_schema=Calculator
)
print(calculator_tool)

tool name: Calculator, tool description: Use this tool when you want to do calculations., tool input: {{'expression': {{'description': 'A math expression.', 'title': 'Expression', 'type': 'string'}}}}


In [12]:
system_prompt = f"""Take the following prompt and calculate the result.

Respond to the user as helpfully and accurately as possible. You have access to the following tools:
{calculator_tool}

Use a json blob to specify a tool by providing an action (tool name) and an action_input (tool input).

Always use the following JSON response format:
{{
    "thought": You should always think about what to do consider previous and subsequent steps,
    "tool": The tool to use,
    "tool_input": A valid dictionary in this format {{"<key>": <value>, ...}},
}}
"""

prompt = "Total Raw Cost = $549.72 + $6.98 + $41.00 + $35.00 + $552.00 + $76.16 + $29.12"

response = llm.get_completion([
    ChatMessage(role=ChatMessageRole.SYSTEM, content=system_prompt), 
    ChatMessage(role=ChatMessageRole.USER, content=prompt),
])
response = json.loads(response, strict=False)
print(json.dumps(response, indent=4))

{
    "thought": "To calculate the total raw cost, you need to add up all the individual costs.",
    "tool": "Calculator",
    "tool_input": {
        "expression": "549.72 + 6.98 + 41.00 + 35.00 + 552.00 + 76.16 + 29.12"
    }
}


In [13]:
print(calculator(**response["tool_input"]))

1289.98


In [14]:
system_prompt = f"""Take the following prompt and calculate the result.

Respond to the user as helpfully and accurately as possible. You have access to the following tools:
{calculator_tool}

Use a json blob to specify a tool by providing an action (tool name) and an action_input (tool input).

Always use the following JSON response format:
{{
    "thought": You should always think about what to do consider previous and subsequent steps,
    "tool": The tool to use,
    "tool_input": A valid dictionary in this format {{"<key>": <value>, ...}},
}}
... (this Thought/Action/Observation can repeat N times)
When you know the final answer, use the following JSON response format:
{{
    "thought": I know the final answer,
    "tool": Final Answer,
    "tool_input": The final answer to the question,
}}
"""

prompt = "Total Raw Cost = $549.72 + $6.98 + $41.00 + $35.00 + $552.00 + $76.16 + $29.12"

response = llm.get_completion([
    ChatMessage(role=ChatMessageRole.SYSTEM, content=system_prompt), 
    ChatMessage(role=ChatMessageRole.USER, content=prompt),
    ChatMessage(role=ChatMessageRole.ASSISTANT, content=json.dumps(response)),
    ChatMessage(role=ChatMessageRole.ASSISTANT, content=f"Response of Calculator tool: {calculator(**response['tool_input'])}"),
])
response = json.loads(response, strict=False)
print(json.dumps(response, indent=4))

{
    "thought": "I know the final answer",
    "tool": "Final Answer",
    "tool_input": 1289.98
}


## 3. Chat-based AI Agents

In [15]:
from language_models.agents.react import ReActAgent
from language_models.tools.earthquake import earthquake_tools
from language_models.tools.current_date import current_date_tool

### Earthquake

In [16]:
system_prompt = """You are an United States Geological Survey expert who can answer questions regarding earthquakes and can run forecasts.

Use the current date tool to access the local date and time before using other tools.

Take the following question and answer it as accurately as possible.
"""

llm = OpenAILanguageModel(
    proxy_client=proxy_client,
    model='gpt-4-32k',
    max_tokens=1024,
    float=0.0,
)

class Output(BaseModel):
    content: str = Field(description="The final answer.")

earthquake_agent = ReActAgent.create(
    llm=llm,
    system_prompt=system_prompt,
    task_prompt="{question}",
    task_prompt_variables=["question"],
    tools=earthquake_tools + [current_date_tool],
    output_format=Output,
    iterations=5,
)

In [17]:
response = earthquake_agent.invoke({"question": "How many earthquakes have occurred for the past week with a magnitude of 5 or greater?"})

04/05/24 09:12:24 INFO Thought: First, I need to get the current date to calculate the start date for the past week.
04/05/24 09:12:24 INFO Tool: Current Date
04/05/24 09:12:24 INFO Tool input: {}
04/05/24 09:12:24 INFO Tool response: 2024-05-04 09:12:24.030399
04/05/24 09:12:28 INFO Thought: Now that I have the current date, I can calculate the start date for the past week. Then, I will use the 'Count' tool to find out how many earthquakes with a magnitude of 5 or greater have occurred during this period.
04/05/24 09:12:28 INFO Tool: Count
04/05/24 09:12:28 INFO Tool input: {'start_time': '2024-04-27T09:12:24.030399', 'min_magnitude': 5}
04/05/24 09:12:28 INFO Tool response: {'count': 28, 'maxAllowed': 20000}
04/05/24 09:12:32 INFO Thought: I now know the final answer
04/05/24 09:12:32 INFO Tool: Final Answer
04/05/24 09:12:32 INFO Tool input: {'content': 'There have been 28 earthquakes with a magnitude of 5 or greater in the past week.'}


In [18]:
print(response.final_answer["content"])

There have been 28 earthquakes with a magnitude of 5 or greater in the past week.


In [19]:
response = earthquake_agent.invoke({"question": "Query 10 earthquakes that occurred yesterday and have a magnitude > 3."})

04/05/24 09:12:48 INFO Thought: To answer the user's question, I need to query the earthquakes that occurred yesterday with a magnitude greater than 3. I will use the 'Query' tool for this. I will set the 'start_time' to the date of yesterday, 'end_time' to the date of today, 'min_magnitude' to 3, and 'limit' to 10.
04/05/24 09:12:48 INFO Tool: Query
04/05/24 09:12:48 INFO Tool input: {'start_time': '2024-05-03T00:00:00', 'end_time': '2024-05-04T00:00:00', 'min_magnitude': 3, 'limit': 10}
04/05/24 09:12:48 INFO Tool response: {'type': 'FeatureCollection', 'metadata': {'generated': 1714806768000, 'url': 'https://earthquake.usgs.gov/fdsnws/event/1/query?format=geojson&starttime=2024-05-03T00%3A00%3A00&endtime=2024-05-04T00%3A00%3A00&limit=10&mindepth=-100&maxdepth=1000&minmagnitude=3', 'title': 'USGS Earthquakes', 'status': 200, 'api': '1.14.1', 'limit': 10, 'offset': 1, 'count': 10}, 'features': [{'type': 'Feature', 'properties': {'mag': 5.1, 'place': '153 km WNW of Panguna, Papua New G

In [20]:
print(response.final_answer["content"])

Here are 10 earthquakes that occurred yesterday with a magnitude greater than 3:

1. [M 5.1 - 153 km WNW of Panguna, Papua New Guinea](https://earthquake.usgs.gov/earthquakes/eventpage/us6000mw86)
2. [M 3.4 - 27 km NNW of Charlotte Amalie, U.S. Virgin Islands](https://earthquake.usgs.gov/earthquakes/eventpage/pr71447623)
3. [M 3.2 - 61 km S of Whites City, New Mexico](https://earthquake.usgs.gov/earthquakes/eventpage/tx2024isgw)
4. [M 3.3 - 5 km SSE of Maria Antonia, Puerto Rico](https://earthquake.usgs.gov/earthquakes/eventpage/pr71447608)
5. [M 4.2 - 5 km NNW of Camiña, Chile](https://earthquake.usgs.gov/earthquakes/eventpage/us6000mw6i)
6. [M 5.1 - South Sandwich Islands region](https://earthquake.usgs.gov/earthquakes/eventpage/us6000mw61)
7. [M 4.6 - 58 km N of Claveria, Philippines](https://earthquake.usgs.gov/earthquakes/eventpage/us6000mw1d)
8. [M 3.0 - 60 km NE of Teller, Alaska](https://earthquake.usgs.gov/earthquakes/eventpage/us6000mvzs)
9. [M 3.4 - 32 km N of San Antonio, P

In [21]:
response = earthquake_agent.invoke({"question": "Can MegaQuakes really happen? Like a magnitude 10 or larger?"})

04/05/24 09:13:18 INFO Thought: The magnitude of an earthquake is related to the area of the fault on which it occurs - the larger the fault area, the larger the earthquake. Theoretically, there is no limit to the magnitude of an earthquake, but in practice, the size of earthquakes is limited by the size of the fault. The largest earthquake ever recorded was a magnitude 9.5 in Chile in 1960. A magnitude 10 earthquake would require a fault that is 1000 km long and 150 km wide, which does not exist on Earth. Therefore, a magnitude 10 or larger earthquake is not considered possible.
04/05/24 09:13:18 INFO Tool: Final Answer
04/05/24 09:13:18 INFO Tool input: {'content': 'The magnitude of an earthquake is related to the area of the fault on which it occurs - the larger the fault area, the larger the earthquake. Theoretically, there is no limit to the magnitude of an earthquake, but in practice, the size of earthquakes is limited by the size of the fault. The largest earthquake ever recorde

In [22]:
print(response.final_answer["content"])

The magnitude of an earthquake is related to the area of the fault on which it occurs - the larger the fault area, the larger the earthquake. Theoretically, there is no limit to the magnitude of an earthquake, but in practice, the size of earthquakes is limited by the size of the fault. The largest earthquake ever recorded was a magnitude 9.5 in Chile in 1960. A magnitude 10 earthquake would require a fault that is 1000 km long and 150 km wide, which does not exist on Earth. Therefore, a magnitude 10 or larger earthquake is not considered possible.


## 4. Service-based AI Agents

In [73]:
import pandas as pd
from sklearn.metrics import accuracy_score
from language_models.agents.chain import AgentChain

### Sentiment Analysis

In [74]:
df = pd.read_csv("./data/tweets.csv.gz", compression="gzip", encoding="latin-1", names=["sentiment", "id", "date", "query", "user", "tweet"])
df = df.dropna()
df = df.where(df.sentiment != 2)
df["sentiment"] = df["sentiment"].map({4: 1, 0: 0})
df = df.sample(n=10)
df.head()

Unnamed: 0,sentiment,id,date,query,user,tweet
205552,0,1972907325,Sat May 30 10:06:39 PDT 2009,NO_QUERY,rodsterace,is late for his dentist appt. They said it was...
1374667,4,2051470995,Fri Jun 05 22:06:33 PDT 2009,NO_QUERY,tweeteradder1,@karl09 Get 100 followers a day using www.twee...
1185751,4,1982834475,Sun May 31 11:59:49 PDT 2009,NO_QUERY,spunky2778,@cgoodale oh but u can.the sounds of the ocean...
640850,0,2235095141,Fri Jun 19 00:24:58 PDT 2009,NO_QUERY,mikedestef,Whyyy are drums so expensiiive? Anyone wanna ...
181966,0,1967021166,Fri May 29 18:49:47 PDT 2009,NO_QUERY,becca2anne0,# number times I bottomed out just in our driv...


In [75]:
system_prompt = """Take the following tweet and determine the sentiment of the review. 

Respond with 1 (positive) or 0 (negative).

If you don't receive a tweet, respond with -1.
"""

llm = OpenAILanguageModel(
    proxy_client=proxy_client,
    model='gpt-4',
    max_tokens=256,
    float=0.0,
)

class Output(BaseModel):
    sentiment: int = Field(description="The sentiment of the tweet.")

sentiment_analysis_agent = ReActAgent.create(
    llm=llm,
    system_prompt=system_prompt,
    task_prompt="Tweet:\n{tweet}",
    task_prompt_variables=["tweet"],
    tools=None,
    output_format=Output,
    iterations=1,
)

In [76]:
def classify_sentiment(tweet: str) -> int:
    response = sentiment_analysis_agent.invoke({'tweet': tweet})
    return response.final_answer['sentiment'] or 0

In [77]:
df["prediction"] = [classify_sentiment(tweet) for tweet in df.tweet]

04/05/24 09:34:04 INFO Thought: The tweet seems to express a neutral sentiment. The user is late for their dentist appointment and anticipates some negative consequences, but they also mention that the dentist's office said it was okay. There's a mix of negative and positive sentiments, which makes it neutral overall.
04/05/24 09:34:04 INFO Tool: Final Answer
04/05/24 09:34:04 INFO Tool input: {'sentiment': 2}
04/05/24 09:34:07 INFO Thought: The tweet seems to be promoting a service, it doesn't express a positive or negative sentiment towards it. It's more of an informative tweet.
04/05/24 09:34:07 INFO Tool: Final Answer
04/05/24 09:34:07 INFO Tool input: {'sentiment': 2}
04/05/24 09:34:10 INFO Thought: The tweet is neutral. It's just a statement about the difference between the sounds of the ocean and a lake.
04/05/24 09:34:10 INFO Tool: Final Answer
04/05/24 09:34:10 INFO Tool input: {'sentiment': 2}
04/05/24 09:34:14 INFO Thought: The tweet expresses a negative sentiment because th

In [78]:
print(f"Accuracy: {accuracy_score(df.sentiment, df.prediction)}")

Accuracy: 0.3


### Auto ML

In [None]:
system_prompt = """Take the following tweet and determine the sentiment of the review. 

Respond with 1 (positive) or 0 (negative).

If you don't receive a tweet, respond with -1.
"""

llm = OpenAILanguageModel(
    proxy_client=proxy_client,
    model='gpt-4',
    max_tokens=128,
    float=0.0,
)

class Output(BaseModel):
    problem: str = Field(description="The machine learning problem.")

problem_finder_agent = ReActAgent.create(
    llm=llm,
    system_prompt=system_prompt,
    task_prompt="Tweet content: {tweet}",
    task_prompt_variables=["tweet"],
    tools=None,
    output_format=Output,
    iterations=5,
)

In [None]:
system_prompt = """Take the following tweet and determine the sentiment of the review. 

Respond with 1 (positive) or 0 (negative).

If you don't receive a tweet, respond with -1.
"""

llm = OpenAILanguageModel(
    proxy_client=proxy_client,
    model='gpt-4',
    max_tokens=128,
    float=0.0,
)

class Output(BaseModel):
    code: str = Field(description="The auto ML code.")

auto_ml_agent = ReActAgent.create(
    llm=llm,
    system_prompt=system_prompt,
    task_prompt="Tweet content: {tweet}",
    task_prompt_variables=["tweet"],
    tools=None,
    output_format=Output,
    iterations=5,
)

In [None]:
chain = AgentChain.create(chain=[problem_finder_agent, auto_ml_agent])