This notebook includes the code for a custom agent that uses SQL tools, ReACT prompting and few-shot example strategies. The LLM used in this file is ChatGPT-4o.

Install the required libraries:

In [1]:
pip install --upgrade --quiet langchain langchain-community langchain-openai

[0mNote: you may need to restart the kernel to use updated packages.


Import the necessary modules:

In [2]:
from langchain_openai import ChatOpenAI

from langchain.agents import tool
from langchain.agents import AgentExecutor
from langchain.agents import create_react_agent
from langchain.agents import ZeroShotAgent

from langchain.agents.output_parsers.openai_tools import OpenAIToolsAgentOutputParser

from langchain.agents.format_scratchpad.openai_tools import format_to_openai_tool_messages

from langchain.agents.agent_toolkits import SQLDatabaseToolkit

from langchain.agents.agent_toolkits.sql.prompt import SQL_FUNCTIONS_SUFFIX
from langchain.agents.agent_toolkits.sql.prompt import SQL_PREFIX
from langchain.agents.agent_toolkits.sql.prompt import SQL_SUFFIX

from langchain.agents.mrkl import prompt as react_prompt
from langchain.agents.mrkl.prompt import FORMAT_INSTRUCTIONS


from langchain.sql_database import SQLDatabase

from langchain_core.prompts import ChatPromptTemplate
from langchain_core.prompts import MessagesPlaceholder
from langchain_core.prompts import PromptTemplate
from langchain_core.prompts import FewShotChatMessagePromptTemplate

Fill in your OpenAI API key in the cell below to make sure your key is used as the environment variable.

In [3]:
#for gpt-4o:
%env OPENAI_API_KEY=

env: OPENAI_API_KEY=sk-proj-VYdLJEqGVWXF0GpsHCCqT3BlbkFJTn4vCvSeAUqQrCtyBh6v


Create the large language model that is going to be used:

In [4]:
llm = ChatOpenAI(model="gpt-4o", temperature=0)

Set up the database that is going to be used:

In [5]:
db = SQLDatabase.from_uri("sqlite:///wijkpaspoort.sqlite3")

Set up the SQL-tools for the agent to use:

In [6]:
toolkit = SQLDatabaseToolkit(db=db, llm=llm)
tools = toolkit.get_tools()
tool_names = "sql_db_query, sql_db_schema, sql_db_list_tables, sql_db_query_checker"

Create the prompt that is sent to the agent:

Some examples that are included in the prompt:

In [7]:
examples = [
{
    "input": "How many measurements are there?", 
    "query": "SELECT COUNT(*) AS total_measurements FROM measurements;",
},
{
    "input": "Find all measurements for the municipality of 'Amsterdam'.",
    "query": "SELECT * FROM measurements WHERE municipality_code = (SELECT municipality_code FROM municipalities WHERE municipality_name = 'Amsterdam');",
},
{
    "input": "List all the relevant information about the people's income in the province of 'Zeeland'.",
    "query": "SELECT people_with_personal_income, share_of_people, average_standardized_income, mediaan_gestandardized_income, average_personal_income, mediaan_personal_income FROM peoples_income WHERE province_code = (SELECT province_code FROM provinces WHERE province_name = 'Zeeland');",
},
{
    "input": "Find the total number of outdoor natural grass fields for soccer.",
    "query": "SELECT SUM(outdoor_natural_grass_fields) FROM sports_facilities WHERE sport = 'voetbal';",
},
{
    "input": "List all the schools in Amsterdam.",
    "query": "SELECT * FROM schools WHERE municipality_name = 'AMSTERDAM';",
},
{
    "input": "How many districts are there in Amsterdam?",
    "query": "SELECT COUNT(*) FROM districts WHERE municipality_code = (SELECT municipality_code FROM municipalities WHERE municipality_name = 'Amsterdam');",
},
{
    "input": "Find the total number of measurements.",
    "query": "SELECT COUNT(*) FROM measurements;",
},
{
    "input": "List all the districts where, on average, the percentage of people who meet the physical activity guidelines is above 60%. Can you order them from high to low for me?",
    "query": "SELECT d.district_name, mun.municipality_name, ROUND(AVG(mea.meets_physical_activity_guidelines),2) FROM measurements mea JOIN districts d ON mea.district_code = d.district_code JOIN municipalities mun ON d.municipality_code = mun.municipality_code WHERE TRIM(mea.meets_physical_activity_guidelines) != '.' AND mea.district_code != -1 GROUP BY d.district_name HAVING ROUND(AVG(mea.meets_physical_activity_guidelines),2) > 60 ORDER BY ROUND(AVG(mea.meets_physical_activity_guidelines),2) DESC;",
},
{
    "input": "Give me the top 10 municipalities and their average overweight percentage where the percentage of people who have overweight is above 65%",
    "query": "SELECT mun.municipality_name, ROUND(AVG(mea.overweight),2) FROM measurements mea JOIN municipalities mun ON mea.municipality_code = mun.municipality_code WHERE mea.municipality_code != -1 GROUP BY mun.municipality_name HAVING ROUND(AVG(mea.overweight),2) > 65 ORDER BY mea.overweight DESC LIMIT 10;",
},
{
    "input": "Can you find me the top 10 municipalities by the average percentage of people that have overweight but have a average personal income between 27 and 28",
    "query": "SELECT mun.municipality_name, ROUND(AVG(mea.overweight),2), ROUND(AVG(pi.average_personal_income), 2) FROM measurements mea JOIN municipalities mun ON mea.municipality_code = mun.municipality_code JOIN peoples_income pi ON mun.municipality_code = pi.municipality_code WHERE pi.average_personal_income IS NOT NULL AND TRIM(pi.average_personal_income) != '.' GROUP BY mun.municipality_name HAVING ROUND(AVG(pi.average_personal_income), 2) BETWEEN 27 AND 28 ORDER BY ROUND(AVG(mea.overweight),2) DESC, ROUND(AVG(pi.average_personal_income),2) DESC LIMIT 10;",
},
{
    "input": "Name the top 20 municipalities with the highest average personal income also show me their average personal incomes.",
    "query": "SELECT mun.municipality_name, ROUND(AVG(pi.average_personal_income),2) FROM peoples_income pi JOIN municipalities mun ON pi.municipality_code = mun.municipality_code WHERE pi.average_personal_income IS NOT NULL AND TRIM(pi.average_personal_income) != '.' GROUP BY mun.municipality_name ORDER BY ROUND(AVG(pi.average_personal_income),2) DESC LIMIT 20;",
},
{
    "input": "Name the top 20 municipalities with the lowest average personal income also show me their incomes.",
    "query": "SELECT mun.municipality_name, ROUND(AVG(pi.average_personal_income),2) FROM peoples_income pi JOIN municipalities mun ON pi.municipality_code = mun.municipality_code WHERE pi.average_personal_income IS NOT NULL AND TRIM(pi.average_personal_income) != '.' GROUP BY mun.municipality_name ORDER BY ROUND(AVG(pi.average_personal_income),2) ASC LIMIT 20;",
},
{
    "input": "Which 30 municipalities have, on average, the lowest median standardised income and what is the minimum percentage of heavy drinkers in that municipality?",
    "query": "SELECT mun.municipality_name, ROUND(AVG(pi.mediaan_gestandardized_income),2), MIN(mea.heavy_drinker) FROM peoples_income pi JOIN municipalities mun ON pi.municipality_code = mun.municipality_code JOIN measurements mea ON pi.municipality_code = mea.municipality_code WHERE TRIM(pi.average_personal_income) != '.' AND pi.average_personal_income IS NOT NULL GROUP BY municipality_name ORDER BY ROUND(AVG(pi.mediaan_gestandardized_income),2) ASC LIMIT 30;",
},
{
    "input": "Which municipalities are in the province of Zuid-Holland?",
    "query": "SELECT municipality_name FROM municipalities WHERE province_code = (SELECT province_code FROM provinces WHERE province_name LIKE 'Zuid-Holland');",
},
{
    "input": "Which schools are located in Hendrik-Ido-Ambacht and what is their denomination?",
    "query": "SELECT branch_name, denomination FROM schools  WHERE municipality_name LIKE 'HENDRIK-IDO-AMBACHT';",
},
{
    "input": "Find the total number of outdoor synthetic grass fields for hockey.",
    "query": "SELECT SUM(outdoor_synthetic_grass_fields) FROM sports_facilities WHERE sport LIKE '%hockey';",
},
{
    "input": "Find the total number of indoor facilities for basketbal.",
    "query": "SELECT SUM(total_indoor_services) FROM sports_facilities WHERE sport LIKE '%basketbal';",
},
{
    "input": "Name all the neighborhoods in the municipality of Amsterdam.",
    "query": "SELECT neighborhood_name FROM neighborhoods WHERE municipality_code = (SELECT municipality_code FROM municipalities WHERE municipality_name LIKE 'Amsterdam');",
},
{
    "input": "Can you give me the top 20 neighborhoods in Amsterdam where, on average, the amount of weekly sporters is the most?",
    "query": "SELECT nbrhd.neighborhood_name, ROUND(AVG(mea.weekly_sporters ),2) FROM neighborhoods nbrhd JOIN measurements mea ON nbrhd.neighborhood_code = mea.neighborhood_code WHERE nbrhd.municipality_code = (SELECT municipality_code FROM municipalities WHERE municipality_name LIKE 'Amsterdam') AND TRIM(mea.weekly_sporters) != '.' GROUP BY nbrhd.neighborhood_name ORDER BY ROUND(AVG(mea.weekly_sporters ),2) DESC LIMIT 20;",
},
{
    "input": "List the top 20 neighborhoods in the Netherlands based on the amount of people who had very high stress in the past 4 weeks and have more than 20 percent of heavy drinkers. Also show me in which municipalities these neighborhoods are located",
    "query": "SELECT nbrhd.neighborhood_name,  mun.municipality_name, ROUND(AVG(mea.very_high_stress_in_past_4_weeks ),2), ROUND(AVG(mea.heavy_drinker),2) FROM neighborhoods nbrhd JOIN measurements mea ON nbrhd.neighborhood_code = mea.neighborhood_code JOIN municipalities mun ON nbrhd.municipality_code = mun.municipality_code WHERE TRIM(mea.very_high_stress_in_past_4_weeks) != '.' GROUP BY nbrhd.neighborhood_name HAVING ROUND(AVG(mea.heavy_drinker),2) > 20 ORDER BY ROUND(AVG(mea.very_high_stress_in_past_4_weeks ),2) DESC LIMIT 20;",
},
{
    "input": "Can you find the top 20 municipalities with the highest average disposable income for me, round the average to 3 decimals?",
    "query": "SELECT mun.municipality_name, ROUND(AVG(hi.average_disposable_income),3) FROM household_income hi JOIN municipalities mun ON hi.municipality_code = mun.municipality_code WHERE TRIM(hi.average_disposable_income) != '.' AND hi.average_disposable_income IS NOT NULL GROUP BY mun.municipality_name ORDER BY ROUND(AVG(hi.average_disposable_income),3) DESC LIMIT 20;",
},
{
    "input": "Can you find the top 20 municipalities with the lowest average disposable income for me, round the average to 3 decimals?",
    "query": "SELECT mun.municipality_name, ROUND(AVG(hi.average_disposable_income),3) FROM household_income hi JOIN municipalities mun ON hi.municipality_code = mun.municipality_code WHERE TRIM(hi.average_disposable_income) != '.' AND hi.average_disposable_income IS NOT NULL GROUP BY mun.municipality_name ORDER BY ROUND(AVG(hi.average_disposable_income),3) ASC LIMIT 20;",
},
{
    "input": "Can you find the top 20 municipalities with the lowest average disposable income for me? Also show me how many people in that municipality meet the physical activity guideline and show me their respective percentage of people who have overweight.",
    "query": "SELECT mun.municipality_name, hi.municipality_code, ROUND(AVG(mea.meets_physical_activity_guidelines),2), ROUND(AVG(mea.overweight),2), ROUND(AVG(hi.average_disposable_income),2) FROM household_income hi JOIN municipalities mun ON hi.municipality_code = mun.municipality_code JOIN measurements mea ON hi.municipality_code = mea.municipality_code WHERE TRIM(hi.average_disposable_income) != '.' AND hi.average_disposable_income IS NOT NULL GROUP BY mun.municipality_name ORDER BY ROUND(AVG(hi.average_disposable_income),2) ASC LIMIT 20;",
},
{
    "input": "Can you find the top 20 municipalities with the lowest average disposable income for me? Also show me how many people in that municipality are smokers and how many have a high risk of anxiety or depression.",
    "query": "SELECT mun.municipality_name, ROUND(AVG(hi.average_disposable_income),2), ROUND(AVG(mea.smoker),2), ROUND(AVG(mea.high_risk_of_anxiety_or_depression),2) FROM household_income hi JOIN municipalities mun ON hi.municipality_code = mun.municipality_code JOIN measurements mea ON hi.municipality_code = mea.municipality_code WHERE hi.average_disposable_income IS NOT NULL AND TRIM(hi.average_disposable_income) != '.' GROUP BY mun.municipality_name ORDER BY ROUND(AVG(hi.average_disposable_income),2) ASC LIMIT 20;",
},
{
    "input": "Which districts are in the municipality of Tiel?",
    "query": "SELECT * FROM districts WHERE municipality_code = (SELECT municipality_code FROM municipalities WHERE municipality_name LIKE 'Tiel');",
},
{
    "input": "How many districts are in the municipality of Tiel?",
    "query": "SELECT COUNT(*) FROM districts WHERE municipality_code = (SELECT municipality_code FROM municipalities WHERE municipality_name LIKE 'Tiel');",
},
{
    "input": "How many measurements are there in the municipality of Nijkerk where the percentage of people with moderate to high risk of anxiety or depression is above 40%?",
    "query": "SELECT COUNT(*) FROM measurements WHERE municipality_code = (SELECT municipality_code FROM municipalities WHERE municipality_name LIKE '%Nijkerk') AND moderate_high_risk_of_anxiety_or_depression > 40;",
},
{
    "input": "What are the names of the top 10 municipalities where, on average, the percentage of people who meet the alcohol guidelines is the highest?",
    "query": "SELECT mun.municipality_name, ROUND(AVG(mea.meets_alcohol_guidelines),2) FROM measurements mea JOIN municipalities mun ON mea.municipality_code = mun.municipality_code WHERE mea.municipality_code != -1 AND TRIM(mea.meets_alcohol_guidelines) != '.' GROUP BY mun.municipality_name ORDER BY ROUND(AVG(mea.meets_alcohol_guidelines),2) DESC LIMIT 10;",
},
{
    "input": "Can you show me the municipalities where, on average, the average disposable income is between 44 and 45 and also show me the respective percentage of weekly sporters in that municipality?",
    "query": "SELECT mun.municipality_name, ROUND(AVG(hi.average_disposable_income),2), ROUND(AVG(mea.weekly_sporters),2) FROM household_income hi JOIN municipalities mun ON hi.municipality_code = mun.municipality_code JOIN measurements mea ON hi.municipality_code = mea.municipality_code WHERE TRIM(hi.average_disposable_income) != '.' AND hi.average_disposable_income IS NOT NULL AND TRIM(mea.weekly_sporters) != '.' GROUP BY mun.municipality_name HAVING ROUND(AVG(hi.average_disposable_income),2) BETWEEN 44 AND 45 ORDER BY ROUND(AVG(hi.average_disposable_income),2) DESC, ROUND(AVG(mea.weekly_sporters),2) DESC;",
},
{
    "input": "Can you show me the municipalities where, on average, the average personal income is below 28 and also show me the respective percentage of weekly sporters in that municipality?",
    "query": "SELECT mun.municipality_name, ROUND(AVG(pi.average_personal_income),2), ROUND(AVG(mea.weekly_sporters),2) FROM peoples_income pi JOIN municipalities mun ON pi.municipality_code = mun.municipality_code JOIN measurements mea ON pi.municipality_code = mea.municipality_code WHERE TRIM(pi.average_personal_income) != '.' AND pi.average_personal_income IS NOT NULL AND TRIM(mea.weekly_sporters) != '.' GROUP BY mun.municipality_name HAVING ROUND(AVG(pi.average_personal_income),2) < 28 ORDER BY ROUND(AVG(pi.average_personal_income),2) DESC, ROUND(AVG(mea.weekly_sporters),2) DESC;",
},
{
    "input": "Can you show me the top 10 neighborhoods and their municipalities with, on average, the highest percentage of excessive drinkers but where the average standardized income of the corresponding municipality is below 15.",
    "query": "SELECT nbrhd.neighborhood_name, mun.municipality_name, ROUND(AVG(mea.excessive_drinker),2), ROUND(AVG(pi.average_standardized_income),2) FROM measurements mea JOIN neighborhoods nbrhd ON mea.neighborhood_code = nbrhd.neighborhood_code  JOIN municipalities mun ON nbrhd.municipality_code = mun.municipality_code JOIN peoples_income pi ON mun.municipality_code = pi.municipality_code WHERE TRIM(mea.excessive_drinker) != '.' GROUP BY nbrhd.neighborhood_name HAVING AVG(pi.average_standardized_income) < 15 ORDER BY AVG(mea.excessive_drinker) DESC LIMIT 10;",
},
{
    "input": "Can you show me the top 5 neighborhoods and their municipalities with, on average, the highest percentage of people with excellent to very good health but where the average disposable income of the corresponding municipality is below 40.",
    "query": "SELECT nbrhd.neighborhood_name, mun.municipality_name, ROUND(AVG(mea.excellent_very_good_health),2), ROUND(AVG(hi.average_disposable_income),2) FROM measurements mea JOIN neighborhoods nbrhd ON mea.neighborhood_code = nbrhd.neighborhood_code JOIN municipalities mun ON nbrhd.municipality_code = mun.municipality_code JOIN household_income hi ON mun.municipality_code = hi.municipality_code WHERE TRIM(mea.excellent_very_good_health) != '.' GROUP BY nbrhd.neighborhood_name HAVING AVG(hi.average_disposable_income) < 40 ORDER BY AVG(mea.excellent_very_good_health) DESC LIMIT 5;",
},

]

The prefix of the prompt:

In [8]:
system_prefix = """You are an agent designed to interact with a SQL database.
Given an input question create a syntactically correct {dialect} query to run, then look at the results of the query and return the answer.
Unless the user specifies a specific number of examples they wish to obtain, always limit your query to at most {top_k} results.
When there are less than {top_k} examples available in the database, limit the results to the number of examples that are available in the database.
You can order the results by a relevant column to return the most interesting examples in the database.
Only use the given tools. Only use the information returned by the tools to construct your final answer.
ALWAYS come up with the final answer AFTER you have used the sql_db_query tool. 
DO NOT come up with the answer without constructing a SQL query.
If you have retrieved SQL-output in your observation and you are able to answer the input question with the SQL-output, your next thought may be that you know the final answer.
You MUST double check your query with the sql_db_query_checker tool before executing it. 
Make sure the input to the sql_db_query tool starts with 'SELECT' and does not start with something like: ' ```sql '.
Delete preceding backticks (```) and the 'sql' keyword when constructing a query.
Also make sure the SQL-query ends with a semicolon.
If you get an error while executing a query, rewrite the query and try again.

DO NOT make any DML statements (INSERT, UPDATE, DELETE, DROP etc.) to the database.

If the question does not seem related to the database, just return "I don't know" as the final answer."""

The suffix of the prompt:

In [9]:
basic_suffix = """Begin!

Question: {input}
Thought: I should look at the tables in the database to see what I can query. Then I should query the schema of the most relevant tables.
{agent_scratchpad}
"""

The examples that are going to be included in the prompt:

In [10]:
example_prompt = ChatPromptTemplate.from_messages(
    messages=[
        ('human', "{input}"),
        ('ai', "{query}\n")
    ]
)

few_shot_prompt = FewShotChatMessagePromptTemplate(
    examples=examples,
    example_prompt=example_prompt,
    input_variables=["input",
                     "agent_scratchpad"],
)

The format instructions that are going to be included in the prompt:

In [11]:
format_instructions = f"{react_prompt.FORMAT_INSTRUCTIONS}\n\n" \
                      f"Here are some examples of user inputs and" \
                      f" their corresponding SQL queries:"

Combine the prefix, the tools to use, the format instructions, the examples and the suffix into one prompt:

In [12]:
template = "\n\n".join(
    [
        system_prefix,
        format_instructions,
        "You have access to the following tools for interacting with the database: \n{tools}",
        few_shot_prompt.format(),
        basic_suffix
    ]
)

Preview the prompt:

In [13]:
fs_prompt = PromptTemplate.from_template(template=template)

fs_prompt.pretty_print()

You are an agent designed to interact with a SQL database.
Given an input question create a syntactically correct [33;1m[1;3m{dialect}[0m query to run, then look at the results of the query and return the answer.
Unless the user specifies a specific number of examples they wish to obtain, always limit your query to at most [33;1m[1;3m{top_k}[0m results.
When there are less than [33;1m[1;3m{top_k}[0m examples available in the database, limit the results to the number of examples that are available in the database.
You can order the results by a relevant column to return the most interesting examples in the database.
Only use the given tools. Only use the information returned by the tools to construct your final answer.
ALWAYS come up with the final answer AFTER you have used the sql_db_query tool. 
DO NOT come up with the answer without constructing a SQL query.
If you have retrieved SQL-output in your observation and you are able to answer the input question with the SQL-outpu

Create a ReACT agent that uses the ReACT prompting technique with the create_react_agent function. The agent is also prompted with few shot examples.

In [14]:
react_agent = create_react_agent(llm=llm,
                                 tools=tools,
                                 prompt=fs_prompt)

An agent executor function is then used to get the agent model running:

In [15]:
agent_executor = AgentExecutor(agent=react_agent, 
                               tools=tools,
                               handle_parsing_errors=True,
                               verbose=True)

In [16]:
agent_executor.invoke({
    "input": "List all the districts where, on average, the percentage of people who meet the physical activity guidelines is above 60%. Can you order them from high to low for me?",
    "dialect": "sqlite",
    "top_k":10,
    "tool_names":tool_names,
                      })



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mAction: sql_db_list_tables
Action Input: [0m[38;5;200m[1;3maddresses, associated_fat, associated_liquid, clients, cluster_results, consumption_day, consumption_recipe, consumption_sup_nut, districts, household_income, measurements, municipalities, neighborhoods, participants, peoples_income, provinces, schools, sports_facilities, user_client, users[0m[32;1m[1;3mThe relevant tables for this query are likely `measurements`, `districts`, and possibly `municipalities`. I will first check the schema of these tables to understand their structure and relationships.

Action: sql_db_schema
Action Input: measurements, districts, municipalities[0m[33;1m[1;3m
CREATE TABLE districts (
	district_code TEXT, 
	district_name TEXT, 
	municipality_code TEXT
)

/*
3 rows from districts table:
district_code	district_name	municipality_code
168000	Wijk 00 Annen	1680
168001	Wijk 01 Eext	1680
168002	Wijk 02 Anloo	1680
*/


CREATE TABLE measu

{'input': 'List all the districts where, on average, the percentage of people who meet the physical activity guidelines is above 60%. Can you order them from high to low for me?',
 'dialect': 'sqlite',
 'top_k': 10,
 'tool_names': 'sql_db_query, sql_db_schema, sql_db_list_tables, sql_db_query_checker',
 'output': 'The districts where, on average, the percentage of people who meet the physical activity guidelines is above 60%, ordered from high to low, are:\n\n1. Wijk 05 Oosterend (68.63%)\n2. Wijk 01 Retranchement (68.42%)\n3. Wijk 04 Hoorn (67.35%)\n4. Wijk 02 Formerum (65.83%)\n5. Wijk 01 Midsland (65.8%)\n6. Wijk 03 Lies (65.4%)\n7. Wageningen Universiteit (64.73%)\n8. Wijk 00 West-Terschelling (64.15%)\n9. Uilenstede, Kronenburg (63.67%)\n10. Wijk 01 Overveen (60.9%)\n11. Wijk 02 Heemstede west van de spoorbaan (60.78%)\n12. Wijk 01 Blockhovepark (60.78%)\n13. Wijk 02 Aerdenhout (60.65%)\n14. Wijk 04 Heemskerkerduin en Noorddorp (60.45%)\n15. Duinwijk (60.1%)\n16. Wijk 03 Castricum

In [17]:
llm.invoke("Find all measurements for the municipality of 'Amsterdam'")

AIMessage(content="To provide you with accurate and comprehensive measurements for the municipality of Amsterdam, I will cover various aspects such as geographical size, population, and other relevant statistics. Here are the key measurements:\n\n### Geographical Size\n- **Total Area**: Approximately 219.3 square kilometers (84.6 square miles)\n  - **Land Area**: Around 166.8 square kilometers (64.4 square miles)\n  - **Water Area**: About 52.5 square kilometers (20.3 square miles)\n\n### Population\n- **Total Population**: As of 2023, the population of Amsterdam is approximately 872,000 people.\n- **Population Density**: Roughly 5,230 people per square kilometer (13,540 people per square mile)\n\n### Elevation\n- **Average Elevation**: Amsterdam is situated at an average elevation of about 2 meters (6.6 feet) above sea level.\n\n### Administrative Divisions\n- **Districts**: Amsterdam is divided into 8 districts (stadsdelen), which include:\n  1. Centrum\n  2. West\n  3. Nieuw-West\n 

In [18]:
questions = [
    {"input": "How many addresses are in the database?", "dialect": "sqlite", "top_k":10, "tool_names":tool_names,},
    {"input": "How many school are there?", "dialect": "sqlite", "top_k":10, "tool_names":tool_names,},
    {"input": "List all measurements for the municipality of ‘’s-Hertogenbosch’", "dialect": "sqlite", "top_k":10, "tool_names":tool_names,},
    {"input": "How many indor services are there for korfball?", "dialect": "sqlite", "top_k":10, "tool_names":tool_names,},
    {"input": "How many measurements are there in the municipality of Staphorst where the percentage of people with excellent to very good health is above 60%?", "dialect": "sqlite", "top_k":10, "tool_names":tool_names,},
    {"input": "How many measurements are there for the Vondelburt district where more than 1 out of 10 people is a heavy drinker%?", "dialect": "sqlite", "top_k":10, "tool_names":tool_names,},
    {"input": "Can you show me the top 10 municipalities by their average percentage of weekly sporters?", "dialect": "sqlite", "top_k":10, "tool_names":tool_names,},
    {"input": "List the top 10 districts by overwait and also list their respective values?", "dialect": "sqlite", "top_k":10, "tool_names":tool_names,},
    {"input": "Can you find the top 10 municipalities with the highest median standardised income for me? Also show me how many people in that municipality have overweight and how many have a high risk of anxiety or depression.", "dialect": "sqlite", "top_k":10, "tool_names":tool_names,},
    {"input": "Give me the top 20 neighbourhoods in the Netherlands based on the amount of people who had very high stress in the past 4 weeks and have more than 20 percent of heavy drinkers. Also show me in which municipalities these neighborhoods are located.", "dialect": "sqlite", "top_k":10, "tool_names":tool_names,},
    {"input": "Can you show me the municipalities with an average disposable income between 100 and 120 and also show me the respective percentage of weekly sporters in that municipality?", "dialect": "sqlite", "top_k":10, "tool_names":tool_names,},
    {"input": "Make a list with the top 10 neighborhoods and their municipalities with, on average, the highest percentage of weekly sporters but where the average standardized income of the corresponding municipality is below 15.", "dialect": "sqlite", "top_k":10, "tool_names":tool_names,},
]

In [19]:
for i, question in enumerate(questions):
    try:
        ans = agent_executor.invoke(question)
        print("Question {} : {}".format(i+1, question))
        print("Answer : ", ans.get("output"))
        print()
    except Exception as e:
        print("Question {} : {}".format(i+1, question))
        print("Error : ", e)
        print()



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mAction: sql_db_list_tables
Action Input: [0m[38;5;200m[1;3maddresses, associated_fat, associated_liquid, clients, cluster_results, consumption_day, consumption_recipe, consumption_sup_nut, districts, household_income, measurements, municipalities, neighborhoods, participants, peoples_income, provinces, schools, sports_facilities, user_client, users[0m[32;1m[1;3mThe table `addresses` seems to be the most relevant for finding the total number of addresses in the database. I will now check the schema of the `addresses` table to understand its structure.

Action: sql_db_schema
Action Input: addresses[0m[33;1m[1;3m
CREATE TABLE addresses (
	address_id INTEGER NOT NULL, 
	postal_code TEXT, 
	house_number INTEGER NOT NULL, 
	neighborhood_code TEXT, 
	district_code TEXT, 
	municipality_code TEXT, 
	PRIMARY KEY (address_id), 
	FOREIGN KEY(neighborhood_code) REFERENCES neighborhoods (neighborhood_code), 
	FOREIGN KEY(district_