In [1]:
import sqlite3
import os
from dotenv import load_dotenv

from langchain_community.agent_toolkits.sql.toolkit import SQLDatabaseToolkit
from langchain_community.utilities.sql_database import SQLDatabase

from sqlalchemy import create_engine
from sqlalchemy.pool import StaticPool

In [2]:
load_dotenv()

True

In [4]:
def get_engine_for_local_db(db_path):
    """Create engine for local SQLite database."""
    connection = sqlite3.connect(db_path, check_same_thread=False)
    return create_engine(
        "sqlite://",
        creator=lambda: connection,
        poolclass=StaticPool,
        connect_args={"check_same_thread": False},
    )

In [5]:
db_path = os.environ.get("DB_PATH")
print(db_path)

data/northwind_example_db/sqlite_northwind.db


In [5]:
engine = get_engine_for_local_db(db_path)
print(engine)

Engine(sqlite://)


In [6]:
db = SQLDatabase(engine)
print(db)

<langchain_community.utilities.sql_database.SQLDatabase object at 0x000001AECAF00950>


In [7]:
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-4o-mini")
print(llm)

client=<openai.resources.chat.completions.Completions object at 0x000001AECB5B3710> async_client=<openai.resources.chat.completions.AsyncCompletions object at 0x000001AECB5DD430> root_client=<openai.OpenAI object at 0x000001AECAB3CA40> root_async_client=<openai.AsyncOpenAI object at 0x000001AECB344230> model_name='gpt-4o-mini' model_kwargs={} openai_api_key=SecretStr('**********')


In [8]:
toolkit = SQLDatabaseToolkit(db=db, llm=llm)
print(toolkit)

db=<langchain_community.utilities.sql_database.SQLDatabase object at 0x000001AECAF00950> llm=ChatOpenAI(client=<openai.resources.chat.completions.Completions object at 0x000001AECB5B3710>, async_client=<openai.resources.chat.completions.AsyncCompletions object at 0x000001AECB5DD430>, root_client=<openai.OpenAI object at 0x000001AECAB3CA40>, root_async_client=<openai.AsyncOpenAI object at 0x000001AECB344230>, model_name='gpt-4o-mini', model_kwargs={}, openai_api_key=SecretStr('**********'))


In [9]:
tools = toolkit.get_tools()
print(tools)

[QuerySQLDataBaseTool(description="Input to this tool is a detailed and correct SQL query, output is a result from the database. If the query is not correct, an error message will be returned. If an error is returned, rewrite the query, check the query, and try again. If you encounter an issue with Unknown column 'xxxx' in 'field list', use sql_db_schema to query the correct table fields.", db=<langchain_community.utilities.sql_database.SQLDatabase object at 0x000001AECAF00950>), InfoSQLDatabaseTool(description='Input to this tool is a comma-separated list of tables, output is the schema and sample rows for those tables. Be sure that the tables actually exist by calling sql_db_list_tables first! Example Input: table1, table2, table3', db=<langchain_community.utilities.sql_database.SQLDatabase object at 0x000001AECAF00950>), ListSQLDatabaseTool(db=<langchain_community.utilities.sql_database.SQLDatabase object at 0x000001AECAF00950>), QuerySQLCheckerTool(description='Use this tool to dou

In [10]:
from langchain_core.messages import SystemMessage

SQL_PREFIX = """You are an agent designed to interact with a SQL database.
Given an input question, create a syntactically correct SQLite query to run, then look at the results of the query and return the answer.
Unless the user specifies a specific number of examples they wish to obtain, always limit your query to at most 5 results.
You can order the results by a relevant column to return the most interesting examples in the database.
Never query for all the columns from a specific table, only ask for the relevant columns given the question.
You have access to tools for interacting with the database.
Only use the below tools. Only use the information returned by the below tools to construct your final answer.
You MUST double check your query before executing it. If you get an error while executing a query, rewrite the query and try again.

DO NOT make any DML statements (INSERT, UPDATE, DELETE, DROP etc.) to the database.

To start you should ALWAYS look at the tables in the database to see what you can query.
Do NOT skip this step.
Then you should query the schema of the most relevant tables."""

system_message = SystemMessage(content=SQL_PREFIX)

In [11]:
from langchain_core.messages import HumanMessage
from langgraph.prebuilt import create_react_agent

agent_executor = create_react_agent(llm, tools, messages_modifier=system_message)

  agent_executor = create_react_agent(llm, tools, messages_modifier=system_message)


In [12]:
message = HumanMessage(
    content="What is the best-selling product?"
    )

In [13]:
for s in agent_executor.stream(
    {"messages": [message]}
):
    print(s)
    print("----")

{'agent': {'messages': [AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_KFiT7gzTC49llxSEe840TfXn', 'function': {'arguments': '{}', 'name': 'sql_db_list_tables'}, 'type': 'function'}], 'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 12, 'prompt_tokens': 548, 'total_tokens': 560, 'completion_tokens_details': {'audio_tokens': None, 'reasoning_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': None, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_482c22a7bc', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-23e9d915-e066-4ebd-9bc8-4bab7f9b85f1-0', tool_calls=[{'name': 'sql_db_list_tables', 'args': {}, 'id': 'call_KFiT7gzTC49llxSEe840TfXn', 'type': 'tool_call'}], usage_metadata={'input_tokens': 548, 'output_tokens': 12, 'total_tokens': 560, 'input_token_details': {'cache_read': 0}, 'output_token_details': {'reasoning': 0}})]}}
----
{'tools': {'messages': [ToolMessage(content='Categories, Cust

In [23]:
print(s['agent']['messages'])
print(type(s['agent']['messages']))

[AIMessage(content='The best-selling products are:\n\n1. **Wimmers gute Semmelknödel** - Sold 212,968 units\n2. **Gorgonzola Telino** - Sold 212,882 units\n3. **Steeleye Stout** - Sold 211,790 units\n4. **Perth Pasties** - Sold 211,303 units\n5. **Zaanse koeken** - Sold 210,925 units', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 93, 'prompt_tokens': 2111, 'total_tokens': 2204, 'completion_tokens_details': {'audio_tokens': None, 'reasoning_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': None, 'cached_tokens': 1920}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_482c22a7bc', 'finish_reason': 'stop', 'logprobs': None}, id='run-0c8cfedf-83da-4162-a194-9b6dd60a4d5a-0', usage_metadata={'input_tokens': 2111, 'output_tokens': 93, 'total_tokens': 2204, 'input_token_details': {'cache_read': 1920}, 'output_token_details': {'reasoning': 0}})]
<class 'list'>


In [28]:
print(s['agent']['messages'][0])
print(type(s['agent']['messages'][0]))

content='The best-selling products are:\n\n1. **Wimmers gute Semmelknödel** - Sold 212,968 units\n2. **Gorgonzola Telino** - Sold 212,882 units\n3. **Steeleye Stout** - Sold 211,790 units\n4. **Perth Pasties** - Sold 211,303 units\n5. **Zaanse koeken** - Sold 210,925 units' additional_kwargs={'refusal': None} response_metadata={'token_usage': {'completion_tokens': 93, 'prompt_tokens': 2111, 'total_tokens': 2204, 'completion_tokens_details': {'audio_tokens': None, 'reasoning_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': None, 'cached_tokens': 1920}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_482c22a7bc', 'finish_reason': 'stop', 'logprobs': None} id='run-0c8cfedf-83da-4162-a194-9b6dd60a4d5a-0' usage_metadata={'input_tokens': 2111, 'output_tokens': 93, 'total_tokens': 2204, 'input_token_details': {'cache_read': 1920}, 'output_token_details': {'reasoning': 0}}
<class 'langchain_core.messages.ai.AIMessage'>


In [27]:
print(s['agent']['messages'][0].content)

The best-selling products are:

1. **Wimmers gute Semmelknödel** - Sold 212,968 units
2. **Gorgonzola Telino** - Sold 212,882 units
3. **Steeleye Stout** - Sold 211,790 units
4. **Perth Pasties** - Sold 211,303 units
5. **Zaanse koeken** - Sold 210,925 units


In [8]:
# sanity check
with sqlite3.connect(database=db_path) as conn:
    cursor = conn.cursor()
    cursor.execute(
        """SELECT p.ProductName, SUM(od.Quantity) AS TotalUnitsSold
        FROM [Order Details] od
        JOIN Products p ON od.ProductID = p.ProductID
        GROUP BY p.ProductName
        ORDER BY TotalUnitsSold DESC
        LIMIT 5;"""
        )
    products = cursor.fetchall()

<class 'list'>


In [13]:
print(type(products))
print(len(products))

for elem in products:
    print(f"Item: {elem[0]}, count: {elem[1]}")


<class 'list'>
5
Item: Wimmers gute SemmelknĂ¶del, count: 212968
Item: Gorgonzola Telino, count: 212882
Item: Steeleye Stout, count: 211790
Item: Perth Pasties, count: 211303
Item: Zaanse koeken, count: 210925


In [None]:
# NL -> SQL
# execute SQL
# analyze / say smth smart