# LLM-powered AI Agents

Table of contents
1. Understanding LLMs
2. Tools
3. Chat-based AI Agents
4. Service-based AI agents
5. Multi-Agents

In [1]:
import json
import pandas as pd
import random
from pathlib import Path
from datetime import datetime
from pydantic import BaseModel, Field
from typing import Any
from io import StringIO
from language_models.agents.chain import AgentChain
from language_models.models.llm import OpenAILanguageModel, ChatMessage, ChatMessageRole
from language_models.tools.tool import Tool
from language_models.proxy_client import BTPProxyClient
from language_models.agents.react import ReActAgent
from language_models.tools.earthquake import earthquake_tools
from language_models.tools.current_date import current_date_tool
from language_models.settings import settings
from pprint import pprint

In [2]:
proxy_client = BTPProxyClient(
    client_id=settings.CLIENT_ID,
    client_secret=settings.CLIENT_SECRET,
    auth_url=settings.AUTH_URL,
    api_base=settings.API_BASE,
)

## 1. Understanding LLMs

In [3]:
llm = OpenAILanguageModel(
    proxy_client=proxy_client,
    model="gpt-35-turbo",
    max_tokens=256,
    temperature=0.0,
)

In [4]:
prompt = """Take the following movie review and determine the sentiment of the review.

Movie review:
Wow! This movie was incredible. The acting was superb, and
the plot kept me on the edge of my seat. I highly recommend it!"""

response = llm.get_completion([ChatMessage(role=ChatMessageRole.USER, content=prompt)])
print(response)

Sentiment: Positive


In [5]:
prompt = """Take the following movie review and determine the sentiment of the review.

Movie review:
Wow! This movie was incredible. The acting was superb, and
the plot kept me on the edge of my seat. I highly recommend it!

Respond with positive or negative."""

response = llm.get_completion([ChatMessage(role=ChatMessageRole.USER, content=prompt)])
print(response)

positive


In [6]:
system_prompt = "Take the following movie review and determine the sentiment of the review. Respond with 1 (positive) or 0 (negative)."

prompt = "Wow! This movie was incredible. The acting was superb, and the plot kept me on the edge of my seat. I highly recommend it!"

response = llm.get_completion([
    ChatMessage(role=ChatMessageRole.SYSTEM, content=system_prompt),
    ChatMessage(role=ChatMessageRole.USER, content=prompt),
])
print(response)

1


In [7]:
system_prompt = "Take the following movie review determine the sentiment of the review. Respond with 1 (positive) or 0 (negative)."

prompt = "Will it rain in Seattle today?"

response = llm.get_completion([
    ChatMessage(role=ChatMessageRole.SYSTEM, content=system_prompt),
    ChatMessage(role=ChatMessageRole.USER, content=prompt),
])
pprint(response)

("I'm sorry, I am an AI language model and I do not have access to real-time "
 'weather information. I recommend checking a reliable weather website or '
 'using a weather app to get the most accurate and up-to-date forecast for '
 'Seattle.')


In [8]:
system_prompt = """Take the following movie review and determine the sentiment of the review.

Respond with 1 (positive) or 0 (negative).

If you don't receive a movie review, respond with -1."""

prompt = "Will it rain in Seattle today?"

response = llm.get_completion([
    ChatMessage(role=ChatMessageRole.SYSTEM, content=system_prompt),
    ChatMessage(role=ChatMessageRole.USER, content=prompt),
])
print(response)

-1


## 2. Tools

In [9]:
prompt = "Total Raw Cost = $549.72 + $6.98 + $41.00 + $35.00 + $552.00 + $76.16 + $29.12" # answer: $1,289.98

response = llm.get_completion([ChatMessage(role=ChatMessageRole.USER, content=prompt)])
print(response)

Total Raw Cost = $1,290.98


In [10]:
def calculator(expression: str) -> Any:
    return eval(expression)

class Calculator(BaseModel):
    expression: str = Field(description="A math expression.")

calculator_tool = Tool(
    func=calculator,
    name="Calculator",
    description="Use this tool when you want to do calculations.",
    args_schema=Calculator
)

print(calculator_tool)

tool name: Calculator, tool description: Use this tool when you want to do calculations., tool input: {{'expression': {{'description': 'A math expression.', 'title': 'Expression', 'type': 'string'}}}}


In [11]:
system_prompt = f"""Take the following prompt and calculate the result.

Respond to the user as helpfully and accurately as possible.

You have access to the following tools: {calculator_tool}

Please ALWAYS use the following JSON format:
{{
  "thought": "You should always think about what to do consider previous and subsequent steps",
  "tool": "The tool to use. Must be on of {calculator_tool.name}",
  "tool_input": "Valid keyword arguments",
}}"""

prompt = "Total Raw Cost = $549.72 + $6.98 + $41.00 + $35.00 + $552.00 + $76.16 + $29.12"

response = llm.get_completion([
    ChatMessage(role=ChatMessageRole.SYSTEM, content=system_prompt),
    ChatMessage(role=ChatMessageRole.USER, content=prompt),
])
response = json.loads(response, strict=False)
print(json.dumps(response, indent=4))

{
    "thought": "You should always think about what to do consider previous and subsequent steps",
    "tool": "Calculator",
    "tool_input": {
        "expression": "549.72 + 6.98 + 41.00 + 35.00 + 552.00 + 76.16 + 29.12"
    }
}


In [12]:
print(calculator(**response["tool_input"]))

1289.98


In [13]:
system_prompt = f"""Take the following prompt and calculate the result.

Respond to the user as helpfully and accurately as possible.

You have access to the following tools: {calculator_tool}

Please ALWAYS use the following JSON format:
{{
  "thought": "You should always think about what to do consider previous and subsequent steps",
  "tool": "The tool to use. Must be on of {calculator_tool.name}",
  "tool_input": "Valid keyword arguments",
}}

Observation: tool result
... (this Thought/Tool/Tool Input/Observation can repeat N times)

When you know the answer, you MUST use the following JSON format:
{{
  "thought": "I now know what to respond",
  "tool": "Final Answer",
  "tool_input": "The final answer to the question",
}}"""

prompt = """Total Raw Cost = $549.72 + $6.98 + $41.00 + $35.00 + $552.00 + $76.16 + $29.12

This was your previous work:
Thought: The user wants me to calculate the total raw cost. I will use the Calculator tool.
Tool: Calculator
Tool Input: {"expression": "549.72 + 6.98 + 41.00 + 35.00 + 552.00 + 76.16 + 29.12"}
Observation: Tool Response: 1289.98"""

response = llm.get_completion([
    ChatMessage(role=ChatMessageRole.SYSTEM, content=system_prompt),
    ChatMessage(role=ChatMessageRole.USER, content=prompt),
])
response = json.loads(response, strict=False)
print(json.dumps(response, indent=4))

{
    "thought": "I now know what to respond",
    "tool": "Final Answer",
    "tool_input": "The total raw cost is $1289.98"
}


## 3. Chat-based AI Agents

### Earthquake

In [14]:
system_prompt = "You are an United States Geological Survey expert who can answer questions regarding earthquakes."

llm = OpenAILanguageModel(
    proxy_client=proxy_client,
    model='gpt-4',
    max_tokens=1024,
    float=0.0,
)

class Output(BaseModel):
    content: str = Field(description="The final answer.")

earthquake_agent = ReActAgent.create(
    llm=llm,
    system_prompt=system_prompt,
    task_prompt="{question}",
    task_prompt_variables=["question"],
    tools=earthquake_tools + [current_date_tool],
    output_format=Output,
    iterations=10,
)

In [15]:
response = earthquake_agent.invoke({"question": "How many earthquakes occurred today?"})

25/05/24 08:58:57 INFO Prompt:
How many earthquakes occurred today?
25/05/24 08:58:59 INFO Raw response:
{
  "thought": "I need to get the current date first to determine the start and end times for the earthquake query.",
  "tool": "Current Date",
  "tool_input": {}
}
25/05/24 08:58:59 INFO Thought:
I need to get the current date first to determine the start and end times for the earthquake query.
25/05/24 08:58:59 INFO Tool:
Current Date
25/05/24 08:58:59 INFO Tool input:
{}
25/05/24 08:58:59 INFO Tool response:
2024-05-25 08:58:59.973582
25/05/24 08:59:09 INFO Raw response:
{
  "thought": "Now that I have the current date, I can use it to query the number of earthquakes that occurred today.",
  "tool": "Count Earthquakes",
  "tool_input": {
    "start_time": "2024-05-25T00:00:00",
    "end_time": "2024-05-25T23:59:59"
  }
}
25/05/24 08:59:09 INFO Thought:
Now that I have the current date, I can use it to query the number of earthquakes that occurred today.
25/05/24 08:59:09 INFO Too

In [16]:
print(response.final_answer["content"])

There were 51 earthquakes today.


In [17]:
response = earthquake_agent.invoke({"question": "Show me 3."})

25/05/24 08:59:15 INFO Prompt:
Show me 3.
25/05/24 08:59:20 INFO Raw response:
{
  "thought": "The user wants to see details of 3 earthquakes that occurred today. I will use the 'Query Earthquakes' tool to get this information.",
  "tool": "Query Earthquakes",
  "tool_input": {"start_time": "2024-05-25T00:00:00", "end_time": "2024-05-25T23:59:59", "limit": 3}
}
25/05/24 08:59:20 INFO Thought:
The user wants to see details of 3 earthquakes that occurred today. I will use the 'Query Earthquakes' tool to get this information.
25/05/24 08:59:20 INFO Tool:
Query Earthquakes
25/05/24 08:59:20 INFO Tool input:
{'start_time': '2024-05-25T00:00:00', 'end_time': '2024-05-25T23:59:59', 'limit': 3}
25/05/24 08:59:21 INFO Tool response:
{'type': 'FeatureCollection', 'metadata': {'generated': 1716620361000, 'url': 'https://earthquake.usgs.gov/fdsnws/event/1/query?format=geojson&starttime=2024-05-25T00%3A00%3A00&endtime=2024-05-25T23%3A59%3A59&limit=3&mindepth=-100&maxdepth=1000', 'title': 'USGS Eart

In [18]:
print(response.final_answer["content"])

Here are 3 earthquakes that occurred today:

1. A magnitude 1.5 earthquake occurred 56 km NNW of Yakutat, Alaska. [More details](https://earthquake.usgs.gov/earthquakes/eventpage/ak0246pkakya)

2. A magnitude 1.1 earthquake occurred 62 km W of Happy Valley, Alaska. [More details](https://earthquake.usgs.gov/earthquakes/eventpage/ak0246pk85jk)

3. A magnitude 1.4 earthquake occurred 4 km W of Point Possession, Alaska. [More details](https://earthquake.usgs.gov/earthquakes/eventpage/ak0246pk74e7)


In [19]:
response = earthquake_agent.invoke({"question": "Can MegaQuakes really happen? Like a magnitude 10 or larger?"})

25/05/24 08:59:30 INFO Prompt:
Can MegaQuakes really happen? Like a magnitude 10 or larger?
25/05/24 08:59:35 INFO Raw response:
{'thought': 'The user is asking about the possibility of a magnitude 10 or larger earthquake, often referred to as a "MegaQuake".', 'tool': 'Final Answer', 'tool_input': {'content': 'While theoretically possible, a magnitude 10 or larger earthquake, often referred to as a "MegaQuake", is extremely unlikely. The largest earthquake ever recorded was a magnitude 9.5 in Chile in 1960. An earthquake of magnitude 10 would release 31.6 times more energy than the magnitude 9.5 earthquake. Such an event would require a fault line significantly larger than any currently known on Earth.'}}
25/05/24 08:59:46 INFO Raw response:
{
  "thought": "A magnitude 10 or larger earthquake, often referred to as a 'megaquake', is theoretically possible but extremely unlikely. The magnitude of an earthquake is related to the length of the fault on which it occurs - the larger the faul

In [20]:
pprint(response.final_answer["content"])

("A magnitude 10 or larger earthquake, often referred to as a 'megaquake', is "
 'theoretically possible but extremely unlikely. The magnitude of an '
 'earthquake is related to the length of the fault on which it occurs - the '
 'larger the fault, the larger the earthquake. The biggest fault on Earth, the '
 'Pacific Ring of Fire, could theoretically produce an earthquake of a '
 'magnitude up to about 9.2. A magnitude 10 earthquake would require a fault '
 'three times the length of the Ring of Fire. There is no known fault capable '
 'of producing a magnitude 10 earthquake.')


## 4. Service-based AI Agents

### Contract Drafting

In [21]:
system_prompt = """You are a corporate lawyer. Take the follow bullet points and generate a draft of a section for a contract. Make it lengthy."""

task_prompt = """Bullet points:
{section}"""

llm = OpenAILanguageModel(
    proxy_client=proxy_client,
    model='gpt-4',
    max_tokens=1024,
    float=0.0,
)

class ContractSection(BaseModel):
    title: str = Field(description="The title of the section.")
    content: str = Field(description="The content of the section.")

contract_drafting_agent = ReActAgent.create(
    llm=llm,
    system_prompt=system_prompt,
    task_prompt=task_prompt,
    task_prompt_variables=["section", "section_name"],
    tools=None,
    output_format=ContractSection,
    iterations=3,
)

In [22]:
def generate_contract(contract_sections: list[str]) -> str:
    sections = []
    for contract_section in contract_sections:
        response = contract_drafting_agent.invoke({"section": contract_section})
        section = str(response.final_answer["title"]) + "\n\n" + str(response.final_answer["content"])
        sections.append(section)
        contract_drafting_agent.reset()
    return "\n\n".join(sections)

In [23]:
definitions = "Capitalised terms, singular or plural, used in this Amendment, shall have the same meaning in the GMA."

amendment = """INVOICING AND PAYMENT TERMS
Clause 12.1(ii) of the GMA shall be cancelled and substituted as follow:
[*****]
[*****]
[*****]

Any other provision of Clause 12 shall remain in full force and effect.

PRICE CONDITIONS
(i) Clause 3.2 of the Exhibit 14 of the GMA shall be cancelled and substituted as follow:
“3.2 Technical conditions for prices adjustment
The prices set out in this Exhibit 14 shall be modified every [*****] at the occasion of the invoicing reconciliation pursuant to Clause 11
(“Reconciliation”) if the Standard Operations of the Aircraft, analyzed at the time of the adjustment (all calculations are made with figures corresponding to [*****], change by more or less
[*****] with respect to the estimated values of the same parameters, considered at the time of commencement of the Term.
As from the date this Agreement enters into force, the Parties agree to take into account the following basic operating parameters (the
“Standard Operations”) as a reference for the above calculation:
[*****]
[*****]
[*****]"""

effective_date_and_duration = "Amendment is effective starting on the date of its signature by both Parties."

confidentiality = """Confidential Information released by either of the Parties (the “Disclosing Party”) to the
other Party (the “Receiving Party”) shall not be released in whole or in part to any third party:
- Not to deliver, disclose or publish it to any third party including subsidiary companies and companies having an interest in its capital
- Use Confidential Information solely for the purpose of this Amendment
- Disclose the Confidential Information only to those of its direct employees
- Not to duplicate the Confidential Information nor to copy

Any Confidential Information shall remain the property of the Disclosing Party.

The Receiving Party hereby acknowledges and recognises that Confidential Information is protected by copyright Laws and related
international treaty provisions, as the case may be.

This shall survive termination or expiry of this Amendment for a period of five (5) years following such End Date."""

governing_law = """Pursuant to and in accordance with Section 5-1401 of the New York General Obligations Law.

Arbitration: in the event of a dispute arising out of or relating to this Amendment, including without limitation disputes regarding the
existence, validity or termination of this Amendment (a “Dispute”), either Party may notify such Dispute to the other through service of a
written notice (the “Notice of Dispute”).

Arbitration, and any proceedings, and meetings incidental to or related to the arbitration process, shall take place in New York.

Arbitration shall be kept confidential and the existence of the proceeding and any element.

During any period of negotiation or arbitration, the Parties shall continue to meet their respective obligations.

Notwithstanding any provision of this the Parties may, at any time, seek and decide to settle a Dispute.

Judgment upon any award may be entered in any court having jurisdiction.

Recourse to jurisdictions is expressly excluded except as provided for in the ICC Rules of Conciliation and Arbitration."""

miscellaneous = """Amendment contains the entire agreement between the Parties regarding the subject-matter.

Amendment shall not be varied or modified except by a written document duly signed."""

contract = generate_contract(
    contract_sections=[
        definitions,
        amendment,
        effective_date_and_duration,
        confidentiality,
        governing_law,
        miscellaneous,
    ]
)

25/05/24 08:59:46 INFO Prompt:
Bullet points:
Capitalised terms, singular or plural, used in this Amendment, shall have the same meaning in the GMA.
25/05/24 08:59:58 INFO Raw response:
{
  "thought": "The bullet point provided refers to the definition and interpretation of terms used in the contract. This is typically included in a 'Definitions and Interpretations' section of a contract. I will draft a section based on this.",
  "tool": "Final Answer",
  "tool_input": {
    "title": "Definitions and Interpretations",
    "content": "For the purposes of this Amendment, all capitalised terms, whether used in singular or plural form, shall have the same meaning as defined in the General Master Agreement (GMA). This includes, but is not limited to, terms defined in the body of the GMA, in any schedules, annexes, exhibits, or appendices attached thereto, or in any documents incorporated by reference therein. The interpretation of these terms shall be consistent with the interpretation prov

In [24]:
pprint(contract)

('Definitions and Interpretations\n'
 '\n'
 'For the purposes of this Amendment, all capitalised terms, whether used in '
 'singular or plural form, shall have the same meaning as defined in the '
 'General Master Agreement (GMA). This includes, but is not limited to, terms '
 'defined in the body of the GMA, in any schedules, annexes, exhibits, or '
 'appendices attached thereto, or in any documents incorporated by reference '
 'therein. The interpretation of these terms shall be consistent with the '
 'interpretation provided in the GMA, unless expressly stated otherwise in '
 'this Amendment. Any ambiguity or uncertainty in the interpretation of these '
 'terms shall be resolved by referring to the interpretation provided in the '
 'GMA. This provision is intended to ensure consistency and coherence in the '
 'interpretation and application of the terms used in this Amendment and the '
 'GMA.\n'
 '\n'
 'Amendment to Invoicing, Payment Terms and Price Conditions\n'
 '\n'
 'INVOICING 

### Sentiment Analysis

In [25]:
df_tweets = pd.read_csv("./data/tweets.csv.gz", compression="gzip", encoding="latin-1", names=["sentiment", "id", "date", "query", "user", "tweet"])
df_tweets = df_tweets.dropna()
df_tweets = df_tweets.where(df_tweets.sentiment != 2)
df_tweets["sentiment"] = df_tweets["sentiment"].map({4: 1, 0: 0})
df_tweets_sampled = df_tweets.sample(n=10)
df_tweets_sampled.head(10)

Unnamed: 0,sentiment,id,date,query,user,tweet
1356672,1,2047812115,Fri Jun 05 14:13:05 PDT 2009,NO_QUERY,wendy_uk,@AuntSay I miss him. I think I might be turni...
789758,0,2325475538,Thu Jun 25 05:31:09 PDT 2009,NO_QUERY,ccrover,I want a smart phone
114196,0,1826173653,Sun May 17 07:52:07 PDT 2009,NO_QUERY,kurtis252,Just had power but lost work on pc
496496,0,2185589446,Mon Jun 15 17:36:32 PDT 2009,NO_QUERY,thelindsayellis,"@Calavphin This is just for the summer really,..."
1585144,1,2190643974,Tue Jun 16 03:19:18 PDT 2009,NO_QUERY,Internetvirgin,Yes I do play on expert
511565,0,2189927010,Tue Jun 16 01:16:51 PDT 2009,NO_QUERY,goddessmaat,Just one little thing breaks the tenuous threa...
1381212,1,2052325382,Sat Jun 06 00:21:15 PDT 2009,NO_QUERY,bripearls294,Wondering if Jonesy actually went to bed when ...
1083197,1,1968822822,Fri May 29 22:18:11 PDT 2009,NO_QUERY,ACD93,@itsNICKJONAS seriously....where have ya bein?
1365057,1,2049894013,Fri Jun 05 17:41:25 PDT 2009,NO_QUERY,farahhdibs,Good luck to Nurul and Yanto for SI auditions!...
1057988,1,1962820383,Fri May 29 11:43:52 PDT 2009,NO_QUERY,Jago_X,@arnaldo42 That last tweet was meant for you o...


In [26]:
system_prompt = """Take the following tweet and determine the sentiment of the review.

Respond with 1 (positive) or 0 (negative).

If you don't receive a tweet, respond with -1."""

llm = OpenAILanguageModel(
    proxy_client=proxy_client,
    model='gpt-4',
    max_tokens=128,
    float=0.0,
)

class Sentiment(BaseModel):
    sentiment: int = Field(description="The sentiment of the tweet.")

sentiment_analysis_agent = ReActAgent.create(
    llm=llm,
    system_prompt=system_prompt,
    task_prompt="Tweet:\n{tweet}",
    task_prompt_variables=["tweet"],
    tools=None,
    output_format=Sentiment,
    iterations=3,
)

In [27]:
def classify_sentiment(tweet: str) -> int:
    response = sentiment_analysis_agent.invoke({'tweet': tweet})
    sentiment_analysis_agent.reset()
    return response.final_answer['sentiment'] or 0

In [28]:
df_tweets_sampled["prediction"] = [classify_sentiment(tweet) for tweet in df_tweets_sampled.tweet]

25/05/24 09:02:29 INFO Prompt:
Tweet:
@AuntSay I miss him.  I think I might be turning into a Danny girl, he's so sweet 
25/05/24 09:02:35 INFO Raw response:
{
  "thought": "The tweet expresses a positive sentiment towards 'Danny', indicating that the user misses him and finds him sweet.",
  "tool": "Final Answer",
  "tool_input": {"sentiment": 1}
}
25/05/24 09:02:35 INFO Thought:
The tweet expresses a positive sentiment towards 'Danny', indicating that the user misses him and finds him sweet.
25/05/24 09:02:35 INFO Final answer:
{'sentiment': 1}
25/05/24 09:02:35 INFO Prompt:
Tweet:
I want a smart phone 
25/05/24 09:02:41 INFO Raw response:
{
  "thought": "The tweet doesn't express a positive or negative sentiment. It's a neutral statement expressing a desire or need.",
  "tool": "Final Answer",
  "tool_input": {"sentiment": -1}
}
25/05/24 09:02:41 INFO Thought:
The tweet doesn't express a positive or negative sentiment. It's a neutral statement expressing a desire or need.
25/05/24 0

In [29]:
df_tweets_sampled.head(10)

Unnamed: 0,sentiment,id,date,query,user,tweet,prediction
1356672,1,2047812115,Fri Jun 05 14:13:05 PDT 2009,NO_QUERY,wendy_uk,@AuntSay I miss him. I think I might be turni...,1
789758,0,2325475538,Thu Jun 25 05:31:09 PDT 2009,NO_QUERY,ccrover,I want a smart phone,-1
114196,0,1826173653,Sun May 17 07:52:07 PDT 2009,NO_QUERY,kurtis252,Just had power but lost work on pc,0
496496,0,2185589446,Mon Jun 15 17:36:32 PDT 2009,NO_QUERY,thelindsayellis,"@Calavphin This is just for the summer really,...",0
1585144,1,2190643974,Tue Jun 16 03:19:18 PDT 2009,NO_QUERY,Internetvirgin,Yes I do play on expert,0
511565,0,2189927010,Tue Jun 16 01:16:51 PDT 2009,NO_QUERY,goddessmaat,Just one little thing breaks the tenuous threa...,0
1381212,1,2052325382,Sat Jun 06 00:21:15 PDT 2009,NO_QUERY,bripearls294,Wondering if Jonesy actually went to bed when ...,0
1083197,1,1968822822,Fri May 29 22:18:11 PDT 2009,NO_QUERY,ACD93,@itsNICKJONAS seriously....where have ya bein?,0
1365057,1,2049894013,Fri Jun 05 17:41:25 PDT 2009,NO_QUERY,farahhdibs,Good luck to Nurul and Yanto for SI auditions!...,1
1057988,1,1962820383,Fri May 29 11:43:52 PDT 2009,NO_QUERY,Jago_X,@arnaldo42 That last tweet was meant for you o...,-1


### Structuring Unstructured Data

In [30]:
path = Path("./data/jobs")
filenames = [file.name for file in path.iterdir() if file.is_file()]
filenames = random.sample(filenames, 5)

jobs = []
for filename in filenames:
    file_path = path / filename
    with open(file_path, "r", encoding="utf-8", errors="replace") as file:
        content = file.read()
        jobs.append(content)

In [31]:
system_prompt = """Take the following job and extract data about the job.

Respond with the job information:
- job title: title of the job.
- job class no: class number.
- open date: when the position was created. Use DD-MM-YYYY.
- salary: the salary ranges.
- deadline: when the application deadline is. Use DD-MM-YYYY.
- application form: online or email or fax.
- where to apply: url or location."""

llm = OpenAILanguageModel(
    proxy_client=proxy_client,
    model='gpt-4',
    max_tokens=512,
    float=0.0,
)

class Job(BaseModel):
    job_title: str = Field(description="The job title.")
    job_class_no: int = Field(description="The job class code.")
    job_duties: str = Field(description="The duties of the job.")
    open_date: str = Field(description="When the position was opened. Format: DD-MM-YYYY.")
    salary: list[str] = Field(description="A list of salary ranges. Format: 'min salary to max salary'.")
    deadline: str = Field(description="The application deadline. Format: DD-MM-YYYY")
    application_form: str = Field(description="The form of the application (e.g. online, fax, email).")
    where_to_apply: str = Field(description="The url to apply at or location to send the fax or email address.")

job_data_agent = ReActAgent.create(
    llm=llm,
    system_prompt=system_prompt,
    task_prompt="Job description:\n{job}",
    task_prompt_variables=["job"],
    tools=None,
    output_format=Job,
    iterations=3,
)

In [32]:
def extract_jobs(jobs: list[str]) -> pd.DataFrame:
    data = []
    for job in jobs:
        response = job_data_agent.invoke({"job": job})
        data.append(response.final_answer)
        job_data_agent.reset()
    return pd.DataFrame(data)

In [33]:
df_jobs = extract_jobs(jobs)

25/05/24 09:03:03 INFO Prompt:
Job description:
WATER BIOLOGIST

Class Code:       7856
Open Date:  12-02-16
(Exam Open to All, including Current City Employees)

ANNUAL SALARY

$61,491 to $87,508; $66,294 to $94,335; and $76,191 to $108,408
The salary in the Department of Water and Power is $73,142 to $90,869; $75,857 to $94,252; $78,132 to $97,092; and $93,772 to $116,489

NOTES:

1. For information regarding reciprocity between the City of Los Angeles departments and LADWP, go to http://per.lacity.org/Reciprocity_CityDepts_and_DWP.pdf.
2. The current salary range is subject to change. You may confirm the starting salary with the hiring department before accepting a job offer.
3. Candidates from the eligible list are normally appointed to vacancies in the lower pay grade positions.

DUTIES

A Water Biologist performs oceanographic, marine biological, estuarine, limnological, stormwater, freshwater, and wetland surveys; conducts analysis of marine, estuarine, and freshwater aquatic or

In [34]:
df_jobs.head()

Unnamed: 0,job_title,job_class_no,job_duties,open_date,salary,deadline,application_form,where_to_apply
0,Water Biologist,7856,"A Water Biologist performs oceanographic, mari...",12-02-2016,"[$61,491 to $87,508, $66,294 to $94,335, $76,1...",15-12-2016,online,https://www.governmentjobs.com/careers/lacity
1,ELECTRICAL REPAIR SUPERVISOR,3855,"An Electrical Repair Supervisor assigns, revie...",22-09-2017,"[$115,570 to $122,022]",05-10-2017,online,https://www.governmentjobs.com/careers/lacity/...
2,TREASURY ACCOUNTANT,1609,A Treasury Accountant performs professional tr...,28-10-2016,"[$59,904 to $87,591, $75,000 to $109,640]",10-11-2016,online,https://www.governmentjobs.com/careers/lacity
3,PRINCIPAL CIVIL ENGINEER,9489,A Principal Civil Engineer serves as administr...,23-02-2018,"[$123,067 to $179,944]",08-03-2018,online,https://www.governmentjobs.com/careers/lacity/...
4,AIRPORT POLICE LIEUTENANT,3227,"An Airport Police Lieutenant plans, supervises...",16-09-2016,"[$102,186 to $134,028]",29-09-2016,online,https://www.governmentjobs.com/careers/lacity/...


## 5. Multi-Agents

### Comparing Unstructured Data

In [35]:
def get_job(path: str) -> str:
    with open(path, "r", encoding="utf-8") as file:
        content = file.read()
        return content

job1 = get_job("./data/jobs/ELECTRICAL ENGINEERING ASSOCIATE 7525 093016 REV 100416.txt")
job2 = get_job("./data/jobs/ELECTRICAL MECHANIC 3841 012017.txt")

In [36]:
system_prompt = "Take the following job and extract data about the job"

llm = OpenAILanguageModel(
    proxy_client=proxy_client,
    model='gpt-4',
    max_tokens=512,
    float=0.0,
)

class Job1(BaseModel):
    job1_title: str = Field(description="The job title.")
    job1_duties: str = Field(description="The duties of the job.")
    salary1: list[str] = Field(description="A list of salary ranges. Format: 'min salary to max salary'.")

class Job2(BaseModel):
    job2_title: str = Field(description="The job title.")
    job2_duties: str = Field(description="The duties of the job.")
    salary2: list[str] = Field(description="A list of salary ranges. Format: 'min salary to max salary'.")

job_agent1 = ReActAgent.create(
    llm=llm,
    system_prompt=system_prompt,
    task_prompt="Job description:\n{job1}",
    task_prompt_variables=["job1"],
    tools=None,
    output_format=Job1,
    iterations=10,
)

job_agent2 = ReActAgent.create(
    llm=llm,
    system_prompt=system_prompt,
    task_prompt="Job description:\n{job2}",
    task_prompt_variables=["job2"],
    tools=None,
    output_format=Job2,
    iterations=10,
)

In [37]:
system_prompt = "Take the following 2 job descriptions and respond with the similarities and differences of the jobs."

task_prompt = """Compare the 2 given job descriptions:

Job 1:
Job title: {job1_title}
Job duties:
{job1_duties}
Salary:
{salary1}


Job 2:
Job title: {job2_title}
Job duties:
{job2_duties}
Salary:
{salary2}"""

llm = OpenAILanguageModel(
    proxy_client=proxy_client,
    model='gpt-4',
    max_tokens=512,
    float=0.0,
)

class JobComparison(BaseModel):
    similarities: str = Field(description="The job similarities.")
    differences: str = Field(description="The job differences.")

job_comparison_agent = ReActAgent.create(
    llm=llm,
    system_prompt=system_prompt,
    task_prompt=task_prompt,
    task_prompt_variables=["job1_title", "job1_duties", "salary1", "job2_title", "job2_duties", "salary2"],
    tools=None,
    output_format=JobComparison,
    iterations=10,
)

In [38]:
chain = AgentChain(
    chain=[job_agent1, job_agent2, job_comparison_agent],
    chain_variables=["job1", "job2"],
)

In [39]:
response = chain.invoke({"job1": job1, "job2": job2})

25/05/24 09:04:32 INFO Prompt:
Job description:
ELECTRICAL ENGINEERING ASSOCIATE
Class Code:       7525
Open Date:  09-30-16
REVISED: 10-04-16
 (Exam Open to All, including Current City Employees)
ANNUAL SALARY 

$66,231 to $94,252; $74,082 to $105,444; $82,497 to $117,346; and $89,638 to $127,556
The salary in the Department of Water and Power is $77,360 to $96,110; $91,934 to $114,213; $99,722 to $123,881; and $107,156 to 
$133,130

NOTES:

1. Candidates from the eligible list are normally appointed to vacancies in the lower pay grade positions.
2. For information regarding reciprocity between City of Los Angeles departments and LADWP, go to: http://per.lacity.org/Reciprocity_CityDepts_and_DWP.pdf.
3. The current salary range is subject to change. You may confirm the starting salary with the hiring department before accepting a job offer.

DUTIES

An Electrical Engineering Associate performs professional electrical engineering work in the preparation of designs, plans, specifications

In [40]:
pprint(response.final_answer["similarities"])

('Both jobs are related to electrical work, requiring knowledge and skills in '
 'handling electrical systems and equipment. Both roles also involve work in '
 'various facilities and buildings.')


In [41]:
pprint(response.final_answer["differences"])

('The Electrical Engineering Associate is more focused on design, planning, '
 'and quality assurance, while the Electrical Mechanic is more hands-on, '
 'dealing with installation and maintenance. The salary range for the '
 'Electrical Engineering Associate is wider and generally higher than that of '
 'the Electrical Mechanic.')


### Machine Learning Code Generation

In [42]:
system_prompt = """You are a Data Science agent, which helps the user solve machine learning problems.

Respond with 1 of the following machine learning problems:
- Classification
- Regression
- Clustering
- Time series forecasting"""

task_prompt = """Choose the machine learning problem best suited for the following problem and dataset.

Problem description:
{problem_description}

Dataset:
Number of rows: {dataset_size}
Schema:
{dataset_schema}"""

llm = OpenAILanguageModel(
    proxy_client=proxy_client,
    model='gpt-4',
    max_tokens=128,
    float=0.0,
)

class ModelingProblem(BaseModel):
    modeling_problem: str = Field(description="The machine learning problem.")

problem_finder_agent = ReActAgent.create(
    llm=llm,
    system_prompt=system_prompt,
    task_prompt=task_prompt,
    task_prompt_variables=["problem_description", "dataset_size", "dataset_schema"],
    tools=None,
    output_format=ModelingProblem,
    iterations=5,
)

In [43]:
system_prompt = """You are a Data Science agent, which helps the user solve machine learning problems.

You can solve machine learning problems for:
- Classification
- Regression
- Clustering
- Time series forecasting

You have access to the following Python libraries:
- pandas
- numpy
- scikit-learn"""

task_prompt = """Given the following machine learning problem, respond with Python code.

Modeling problem: {modeling_problem}

Dataset:
Number of rows: {dataset_size}
Schema:
{dataset_schema}
First 10 rows of dataset:
{dataset_snippet}"""

llm = OpenAILanguageModel(
    proxy_client=proxy_client,
    model='gpt-4',
    max_tokens=512,
    float=0.0,
)

class AutoMLCode(BaseModel):
    code: str = Field(description="The Python machine learning code.")

ml_agent = ReActAgent.create(
    llm=llm,
    system_prompt=system_prompt,
    task_prompt=task_prompt,
    task_prompt_variables=["modeling_problem", "dataset_size", "dataset_schema", "dataset_snippet"],
    tools=None,
    output_format=AutoMLCode,
    iterations=10,
)

In [44]:
ml_chain = AgentChain(
    chain=[problem_finder_agent, ml_agent],
    chain_variables=["problem_description", "dataset_size", "dataset_schema", "dataset_snippet"]
)

In [45]:
info_str = StringIO()
df_tweets.info(buf=info_str)
dataset_schema = info_str.getvalue()

In [46]:
response = ml_chain.invoke({
    "problem_description": "I want to classify the sentiment of tweets.",
    "dataset_size": len(df_tweets),
    "dataset_schema": dataset_schema,
    "dataset_snippet": str(df_tweets.head(10).to_markdown())
})

25/05/24 09:05:13 INFO Prompt:
Choose the machine learning problem best suited for the following problem and dataset.

Problem description:
I want to classify the sentiment of tweets.

Dataset:
Number of rows: 1600000
Schema:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1600000 entries, 0 to 1599999
Data columns (total 6 columns):
 #   Column     Non-Null Count    Dtype 
---  ------     --------------    ----- 
 0   sentiment  1600000 non-null  int64 
 1   id         1600000 non-null  int64 
 2   date       1600000 non-null  object
 3   query      1600000 non-null  object
 4   user       1600000 non-null  object
 5   tweet      1600000 non-null  object
dtypes: int64(2), object(4)
memory usage: 73.2+ MB

25/05/24 09:05:18 INFO Raw response:
{
  "thought": "Given the problem description and the dataset, it seems like the user wants to predict the sentiment of tweets, which is a categorical variable. This is a typical example of a classification problem in machine learning.",
  "tool

In [47]:
print(response.final_answer["code"])

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

# Assuming df is the DataFrame
X = df['tweet']
y = df['sentiment']

# Preprocessing steps can be added here

# Convert text into numerical features
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(X)

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a Logistic Regression model
clf = LogisticRegression()
clf.fit(X_train, y_train)

# Make predictions on the test set
y_pred = clf.predict(X_test)

# Evaluate the model
print(classification_report(y_test, y_pred))


### Forecasting

In [48]:
from datetime import timedelta
from language_models.tools.forecasting import get_earthquakes_data, ml_model

In [49]:
class Forecast(BaseModel):
    start_time: str = Field(None, description='Limit to events on or after the specified start time. NOTE: All times use ISO8601 Date/Time format. Unless a timezone is specified, UTC is assumed.')
    end_time: str = Field(None, description='Limit to events on or before the specified end time. NOTE: All times use ISO8601 Date/Time format. Unless a timezone is specified, UTC is assumed.')

def forecast(start_time = None, end_time = None):
    if start_time is None:
        start_time = (datetime.now() - timedelta(days=30)).date()
    if end_time is None:
        end_time = (datetime.now().date())
    df = get_earthquakes_data('https://earthquake.usgs.gov/fdsnws/event/1/query?', start_time, end_time)
    df_pred = ml_model.predict(df)
    return {'predictions': df_pred.to_dict(orient='records')}

In [50]:
forecasting_tool = Tool(func=forecast, name='Forecast', description='Test forecast model on real-time events.', args_schema=Forecast)

In [51]:
task_prompt = """{question}

Use the Current Date tool and respond with the start time and end time."""

llm = OpenAILanguageModel(
    proxy_client=proxy_client,
    model='gpt-4',
    max_tokens=256,
    float=0.0,
)

class DateRange(BaseModel):
    start_time: str = Field(description="The start date. NOTE: All times use ISO8601 Date/Time format. Unless a timezone is specified, UTC is assumed.")
    end_time: str = Field(description="The end date. NOTE: All times use ISO8601 Date/Time format. Unless a timezone is specified, UTC is assumed.")

time_wizard_agent = ReActAgent.create(
    llm=llm,
    system_prompt="",
    task_prompt=task_prompt,
    task_prompt_variables=["question"],
    tools=[current_date_tool],
    output_format=DateRange,
    iterations=5,
)

In [52]:
forecast_chain = AgentChain(
    chain=[time_wizard_agent, forecasting_tool],
    chain_variables=["question"],
)

In [53]:
response = forecast_chain.invoke({"question": "Run a forecast using the past week as data."})

25/05/24 09:05:40 INFO Prompt:
Run a forecast using the past week as data.

Use the Current Date tool and respond with the start time and end time.
25/05/24 09:05:43 INFO Raw response:
{
  "thought": "First, I need to get the current date to calculate the past week's dates.",
  "tool": "Current Date",
  "tool_input": {}
}
25/05/24 09:05:43 INFO Thought:
First, I need to get the current date to calculate the past week's dates.
25/05/24 09:05:43 INFO Tool:
Current Date
25/05/24 09:05:43 INFO Tool input:
{}
25/05/24 09:05:43 INFO Tool response:
2024-05-25 09:05:43.140494
25/05/24 09:05:47 INFO Raw response:
{
  "thought": "Now that I have the current date, I can calculate the start and end times for the past week. The start time would be 7 days before the current date and the end time would be the current date.",
  "tool": "Final Answer",
  "tool_input": {
    "start_time": "2024-05-18T09:05:43.140494",
    "end_time": "2024-05-25T09:05:43.140494"
  }
}
25/05/24 09:05:47 INFO Thought:
Now

In [54]:
print(response.final_answer["Forecast"])

{'predictions': [{'time': Timestamp('2024-05-18 09:08:05.450000+0000', tz='UTC'), 'prediction': 1.896597, 'latitude': 38.8376655578613, 'longitude': -122.798164367676, 'mag': 0.94, 'id': 'nc75008601', 'place': '7 km WNW of Cobb, CA', 'location': '7 km WNW of Cobb, CA'}, {'time': Timestamp('2024-05-18 09:08:41.080000+0000', tz='UTC'), 'prediction': 1.886283, 'latitude': 38.8240013122559, 'longitude': -122.852996826172, 'mag': 0.45, 'id': 'nc75008606', 'place': '10 km NW of The Geysers, CA', 'location': '10 km NW of The Geysers, CA'}, {'time': Timestamp('2024-05-18 09:10:06.760000+0000', tz='UTC'), 'prediction': 2.788089, 'latitude': 32.9258333, 'longitude': -115.5393333, 'mag': 1.95, 'id': 'ci40589335', 'place': '6 km S of Brawley, CA', 'location': '6 km S of Brawley, CA'}, {'time': Timestamp('2024-05-18 09:15:26.510000+0000', tz='UTC'), 'prediction': 2.788089, 'latitude': 32.9298333, 'longitude': -115.5395, 'mag': 1.21, 'id': 'ci40589343', 'place': '6 km S of Brawley, CA', 'location': 

### LLM-backed Tools

In [55]:
earthquake_agent.reset()
problem_finder_agent.reset()
ml_agent.reset()

In [56]:
class EarthquakeAgent(BaseModel):
    question: str = Field(description="The question regarding earthquakes.")

def answer_earthquake_questions(question: str) -> Any:
    response = earthquake_agent.invoke({"question": question})
    return response.final_answer

earthquake_agent_tool = Tool(
    func=answer_earthquake_questions,
    name="Earthquake Agent",
    description="Use this tool to answer questions about earthquakes.",
    args_schema=EarthquakeAgent,
)

class MLAgent(BaseModel):
    problem_description: str = Field(description="The user problem.")
    dataset_size: int = Field(description="The size of the dataset."),
    dataset_schema: str = Field(description="The dataset schema or information."),
    dataset_snippet: str = Field(description="The dataset snippet aka the first couple of rows of the dataset.")

def generate_ml_code(problem_description: str, dataset_size: int, dataset_schema: str, dataset_snippet: str) -> Any:
    response = ml_chain.invoke({
        "problem_description": problem_description,
        "dataset_size": dataset_size,
        "dataset_schema": dataset_schema,
        "dataset_snippet": dataset_snippet,
    })
    return response.final_answer

ml_agent_tool = Tool(
    func=generate_ml_code,
    name="ML Agent",
    description="Use this tool to generate machine learning code given a problem.",
    args_schema=MLAgent,
)

In [57]:
system_prompt = """You are an Agent that delegates tasks to other Agents by using the appropriate tools.

Use the Earthquake Agent when the question is about earthquakes.

Use the ML Agent when the user wants you to generate machine learning code."""

llm = OpenAILanguageModel(
    proxy_client=proxy_client,
    model='gpt-4',
    max_tokens=2048,
    float=0.0,
)

class Output(BaseModel):
    content: str = Field(description="The final answer.")

almighty_agent = ReActAgent.create(
    llm=llm,
    system_prompt=system_prompt,
    task_prompt="{prompt}",
    task_prompt_variables=["prompt"],
    tools=[earthquake_agent_tool, ml_agent_tool],
    output_format=Output,
    iterations=10,
)



In [58]:
response = almighty_agent.invoke({"prompt": "How many earthquakes happened today?"})

25/05/24 09:05:54 INFO Prompt:
How many earthquakes happened today?
25/05/24 09:05:57 INFO Raw response:
{
  "thought": "The user is asking about earthquakes. I should use the Earthquake Agent to get this information.",
  "tool": "Earthquake Agent",
  "tool_input": {"question": "How many earthquakes happened today?"}
}
25/05/24 09:05:57 INFO Thought:
The user is asking about earthquakes. I should use the Earthquake Agent to get this information.
25/05/24 09:05:57 INFO Tool:
Earthquake Agent
25/05/24 09:05:57 INFO Tool input:
{'question': 'How many earthquakes happened today?'}
25/05/24 09:05:57 INFO Prompt:
How many earthquakes happened today?
25/05/24 09:06:00 INFO Raw response:
{
  "thought": "I need to find out the current date first to determine the start and end times for the earthquake query.",
  "tool": "Current Date",
  "tool_input": {}
}
25/05/24 09:06:00 INFO Thought:
I need to find out the current date first to determine the start and end times for the earthquake query.
25/0

In [59]:
print(response.final_answer["content"])

There have been 52 earthquakes today.


In [60]:
prompt = f"""Give me code to train a model that predicts the sentiment of tweet.

Dataset:
Number of rows: {len(df_tweets)}
Schema:
{dataset_schema}
First 10 rows of dataset:
{df_tweets.head(10).to_markdown()}"""

response = almighty_agent.invoke({"prompt": prompt})

25/05/24 09:06:10 INFO Prompt:
Give me code to train a model that predicts the sentiment of tweet.

Dataset:
Number of rows: 1600000
Schema:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1600000 entries, 0 to 1599999
Data columns (total 6 columns):
 #   Column     Non-Null Count    Dtype 
---  ------     --------------    ----- 
 0   sentiment  1600000 non-null  int64 
 1   id         1600000 non-null  int64 
 2   date       1600000 non-null  object
 3   query      1600000 non-null  object
 4   user       1600000 non-null  object
 5   tweet      1600000 non-null  object
dtypes: int64(2), object(4)
memory usage: 73.2+ MB

First 10 rows of dataset:
|    |   sentiment |         id | date                         | query    | user            | tweet                                                                                                               |
|---:|------------:|-----------:|:-----------------------------|:---------|:----------------|:-----------------------------------

In [61]:
print(response.final_answer["content"])

Here is the Python code to train a model that predicts the sentiment of a tweet:

```python
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import SGDClassifier
from sklearn.pipeline import Pipeline

# Define the pipeline
pipeline = Pipeline([
    ('tfidf', TfidfVectorizer()),
    ('clf', SGDClassifier())
])

# Drop irrelevant columns
df = df.drop(columns=['id', 'date', 'query', 'user'])

# Split the data into features and target variable
X = df['tweet']
y = df['sentiment']

# Train the model
pipeline.fit(X, y)
```
