### Database connection

In [1]:
from langchain_community.utilities import SQLDatabase

In [2]:
db_path = '../data/inventory.db'
db = SQLDatabase.from_uri(f"sqlite:///{db_path}")
print("dialect:", db.dialect)
print("tables:", db.get_usable_table_names())

dialect: sqlite
tables: ['Orders', 'Stock']


In [3]:
records = db.run("SELECT * FROM Stock LIMIT 1")
print(records)

[('1009AA', 7210.0, 'Kg', 30, 48, 28.76326)]


In [4]:
info = db.get_table_info(["Stock"])
print(info)


CREATE TABLE "Stock" (
	id TEXT, 
	current_stock_quantity FLOAT, 
	units TEXT, 
	avg_lead_time_days BIGINT, 
	maximum_lead_time_days BIGINT, 
	unit_price FLOAT
)

/*
3 rows from Stock table:
id	current_stock_quantity	units	avg_lead_time_days	maximum_lead_time_days	unit_price
1009AA	7210.0	Kg	30	48	28.76326
1077CA	46516.0	Kg	45	70	22.9777
1083AA	48210.0	Kg	45	68	29.02
*/


In [5]:
records = db.run("SELECT * FROM Orders LIMIT 1")
print(records)

[('2022-11-19 00:00:00.000000', '3084CA', 1.0)]


In [6]:
info = db.get_table_info(["Orders"])
print(info)



CREATE TABLE "Orders" (
	date DATETIME, 
	id TEXT, 
	quantity FLOAT
)

/*
3 rows from Orders table:
date	id	quantity
2022-11-19 00:00:00	3084CA	1.0
2022-06-30 00:00:00	2621AA	20.0
2023-04-19 00:00:00	1193BA	147.0
*/


### Structured Output



In [7]:
from typing_extensions import TypedDict
from typing_extensions import Annotated


class State(TypedDict):
    """State of the system."""
    question: str 
    query: str
    result: str
    answer: str


class QueryOutput(TypedDict):
    """Generated SQL query."""
    query: Annotated[str, ..., "Syntactically valid SQL query."]

### Prompt

In [8]:
from langchain import hub

query_prompt_template = hub.pull("langchain-ai/sql-query-system-prompt")

assert len(query_prompt_template.messages) == 1
query_prompt_template.messages[0].pretty_print()




Given an input question, create a syntactically correct [33;1m[1;3m{dialect}[0m query to run to help find the answer. Unless the user specifies in his question a specific number of examples they wish to obtain, always limit your query to at most [33;1m[1;3m{top_k}[0m results. You can order the results by a relevant column to return the most interesting examples in the database.

Never query for all the columns from a specific table, only ask for a the few relevant columns given the question.

Pay attention to use only the column names that you can see in the schema description. Be careful to not query for columns that do not exist. Also, pay attention to which column is in which table.

Only use the following tables:
[33;1m[1;3m{table_info}[0m

Question: [33;1m[1;3m{input}[0m


### LLM

In [9]:
from langchain_ollama import ChatOllama

llm = ChatOllama(model="qwen2.5-coder:latest")

### Text-2-SQL Chain

#### Example 1

In [10]:
def write_query(state: State):
    """Generate SQL query to fetch information."""
    prompt = query_prompt_template.invoke(
        {
            "dialect": db.dialect,
            "top_k": 10,
            "table_info": db.get_table_info(),
            "input": state["question"],
        }
    )
    structured_llm = llm.with_structured_output(QueryOutput)
    result = structured_llm.invoke(prompt)
    return {"query": result["query"]}

write_query({"question": "How many Orders are there?"})

{'query': 'SELECT COUNT(*) FROM Orders'}

### Execute the query

In [11]:
from langchain_community.tools.sql_database.tool import QuerySQLDatabaseTool


def execute_query(state: State):
    """Execute SQL query."""
    execute_query_tool = QuerySQLDatabaseTool(db=db)
    return {"result": execute_query_tool.invoke(state["query"])}

In [28]:
# Copiamos la query generada anteriormente
response = execute_query({'query': 'SELECT COUNT(*) FROM Orders'})
print(response['result'])

[(33919,)]


### Generate answer

In [16]:
def generate_answer(state: State):
    """Answer question using retrieved information as context."""
    prompt = (
        "Given the following user question, corresponding SQL query, "
        "and SQL result, answer the user question.\n\n"
        f'Question: {state["question"]}\n'
        f'SQL Query: {state["query"]}\n'
        f'SQL Result: {state["result"]}'
    )
    response = llm.invoke(prompt)
    return {"answer": response.content}

In [29]:
state = State(
    question="How many Orders are there?",
    query="SELECT COUNT(*) FROM Orders'",
    result="33919",
    answer="",
)
generate_answer(state)

{'answer': 'There are 33,919 orders.'}

#### Example 2

In [35]:
question = "Select the orders that have a quantity equal 475"

In [36]:
query = write_query({"question": question})
print(query)

{'query': 'SELECT id, date, quantity FROM Orders WHERE quantity = 475 LIMIT 10;'}


In [37]:
response = execute_query({'query': query})
print(response['result'])

[('2785BA', '2023-04-16 00:00:00.000000', 475.0), ('1993AA', '2022-05-23 00:00:00.000000', 475.0), ('2727CA', '2022-05-23 00:00:00.000000', 475.0), ('2814BA', '2022-04-03 00:00:00.000000', 475.0), ('2847BA', '2022-09-23 00:00:00.000000', 475.0), ('2291AA', '2022-08-20 00:00:00.000000', 475.0), ('2803BA', '2022-03-28 00:00:00.000000', 475.0)]


In [39]:
state = State(
    question=question,
    query=query,
    result=response['result'],
    answer="",
)
generate_answer(state)

{'answer': 'The SQL query you provided selects orders from a table named "Orders" where the quantity is equal to 475. The query limits the results to the first 10 rows that match this condition.\n\nHere\'s a breakdown of the query:\n- `SELECT id, date, quantity`: This part of the query specifies that we want to retrieve the columns \'id\', \'date\', and \'quantity\' from the table.\n- `FROM Orders`: This indicates that the data should be fetched from the "Orders" table.\n- `WHERE quantity = 475`: This is the condition that filters the rows. Only those rows where the \'quantity\' column has a value of 475 are selected.\n- `LIMIT 10`: This limits the result set to only the first 10 rows that meet the specified condition.\n\nThe SQL result you provided shows the data for these orders, including their IDs, dates, and quantities.'}