# SQL Agent - IBM Generative AI Python SDK
- https://ibm.github.io/ibm-generative-ai/v2.3.0/rst_source/examples.extensions.langchain.langchain_sql_agent.html

In [1]:
from dotenv import load_dotenv
from genai import Client, Credentials
from langchain.agents import AgentExecutor
from genai.schema import TextGenerationParameters
from langchain.memory import ConversationBufferMemory
from langchain_community.utilities import SQLDatabase
from langchain_core.runnables import RunnablePassthrough
from genai.extensions.langchain import LangChainChatInterface
from langchain.agents.format_scratchpad import format_log_to_str
from langchain.agents.output_parsers import JSONAgentOutputParser
from langchain_community.agent_toolkits import SQLDatabaseToolkit
from langchain.tools.render import render_text_description_and_args
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

print('Load .env variables:', load_dotenv())

Load .env variables: True


# Connects to WatsonX

In [2]:
llm = LangChainChatInterface(
    client=Client(credentials=Credentials.from_env()),
    model_id='meta-llama/llama-3-70b-instruct',
    parameters=TextGenerationParameters(
        max_new_tokens=250, min_new_tokens=20, temperature=0, stop_sequences=['\nObservation']
    ))

# Load Database

Setup:

- Download [``Chinook_Sqlite.sql``](https://raw.githubusercontent.com/lerocha/chinook-database/master/ChinookDatabase/DataSources/Chinook_Sqlite.sql)
- Run ``sqlite3 Chinook.db``
- Run ``.read Chinook_Sqlite.sql``
- Test ``SELECT * FROM Artist LIMIT 10``;

In [3]:
db = SQLDatabase.from_uri('sqlite:///Chinook.db')
print(db.dialect)
print(db.get_usable_table_names())
db.run('SELECT * FROM Artist LIMIT 10;')

sqlite
['Album', 'Artist', 'Customer', 'Employee', 'Genre', 'Invoice', 'InvoiceLine', 'MediaType', 'Playlist', 'PlaylistTrack', 'Track']


"[(1, 'AC/DC'), (2, 'Accept'), (3, 'Aerosmith'), (4, 'Alanis Morissette'), (5, 'Alice In Chains'), (6, 'Antônio Carlos Jobim'), (7, 'Apocalyptica'), (8, 'Audioslave'), (9, 'BackBeat'), (10, 'Billy Cobham')]"

# Prepare SQL toolkit

In [4]:
sql_toolkit = SQLDatabaseToolkit(db=db, llm=llm)
tools = sql_toolkit.get_tools()
print('Available tools:', tools)

Available tools: [QuerySQLDataBaseTool(description="Input to this tool is a detailed and correct SQL query, output is a result from the database. If the query is not correct, an error message will be returned. If an error is returned, rewrite the query, check the query, and try again. If you encounter an issue with Unknown column 'xxxx' in 'field list', use sql_db_schema to query the correct table fields.", db=<langchain_community.utilities.sql_database.SQLDatabase object at 0x11072b510>), InfoSQLDatabaseTool(description='Input to this tool is a comma-separated list of tables, output is the schema and sample rows for those tables. Be sure that the tables actually exist by calling sql_db_list_tables first! Example Input: table1, table2, table3', db=<langchain_community.utilities.sql_database.SQLDatabase object at 0x11072b510>), ListSQLDatabaseTool(db=<langchain_community.utilities.sql_database.SQLDatabase object at 0x11072b510>), QuerySQLCheckerTool(description='Use this tool to double 

# Prepare Agent

In [5]:
system_prompt = """Respond to the human as helpfully and accurately as possible. You have access to the following tools:
{tools}
Use a json blob to specify a tool by providing an action key (tool name) and an action_input key (tool input).
Valid "action" values: "Final Answer" or {tool_names}
Provide only ONE action per $JSON_BLOB, as shown:
```
{{
  "action": $TOOL_NAME,
  "action_input": $INPUT
}}
```
Follow this format:
Question: input question to answer
Thought: consider previous and subsequent steps
Action:
```
$JSON_BLOB
```
Observation: action result
... (repeat Thought/Action/Observation N times)
Thought: I know what to respond
Action:
```
{{
  "action": "Final Answer",
  "action_input": "Final response to human"
}}
Begin! Reminder to ALWAYS respond with a valid json blob of a single action.
Respond directly if appropriate. Format is Action:```$JSON_BLOB```then Observation"""  # noqa

human_prompt = """{input}
{agent_scratchpad}
(reminder to respond in a JSON blob no matter what)"""

memory = ConversationBufferMemory()

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        MessagesPlaceholder("chat_history", optional=True),
        ("human", human_prompt),
    ]
).partial(
    tools=render_text_description_and_args(list(tools)),
    tool_names=", ".join([t.name for t in tools]),
)

agent = (
    RunnablePassthrough.assign(
        # format the agent's scratchpad to a string
        agent_scratchpad=lambda x: format_log_to_str(x["intermediate_steps"]),
        # pass the memory as the chat history
        chat_history=lambda x: memory.chat_memory.messages,
    )
    | prompt
    | llm
    | JSONAgentOutputParser()
)

agent_executor = AgentExecutor(agent=agent, tools=tools, handle_parsing_errors=True, verbose=True, memory=memory)

# Execution

In [6]:
agent_executor.invoke({"input": "Which is the table of the schema with more tuples?"})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mQuestion: Which is the table of the schema with more tuples?
Thought: I need to get the list of tables first.
Action:
```
{
  "action": "sql_db_list_tables",
  "action_input": ""
}
```
Observation[0m[38;5;200m[1;3mAlbum, Artist, Customer, Employee, Genre, Invoice, InvoiceLine, MediaType, Playlist, PlaylistTrack, Track[0m[32;1m[1;3mI have the list of tables. Now, I need to get the schema for each table to know the column names, and then I can query the number of tuples in each table.
Action:
```
{
  "action": "sql_db_schema",
  "action_input": "Album, Artist, Customer, Employee, Genre, Invoice, InvoiceLine, MediaType, Playlist, PlaylistTrack, Track"
}
```
Observation[0m[33;1m[1;3m
CREATE TABLE "Album" (
	"AlbumId" INTEGER NOT NULL, 
	"Title" NVARCHAR(160) NOT NULL, 
	"ArtistId" INTEGER NOT NULL, 
	PRIMARY KEY ("AlbumId"), 
	FOREIGN KEY("ArtistId") REFERENCES "Artist" ("ArtistId")
)

/*
3 rows from Album table:
AlbumId

{'input': 'Which is the table of the schema with more tuples?',
 'history': '',
 'output': 'The table with the most tuples is PlaylistTrack with 8715 tuples.'}

In [7]:
agent_executor.invoke({"input": "Who is the Artist with the most Albums?"})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: I need to get the list of tables first to know which table to query.

Action:
```
{
  "action": "sql_db_list_tables",
  "action_input": ""
}
```
Observation[0m[38;5;200m[1;3mAlbum, Artist, Customer, Employee, Genre, Invoice, InvoiceLine, MediaType, Playlist, PlaylistTrack, Track[0m[32;1m[1;3mThought: Now that I have the list of tables, I need to get the schema of the relevant tables to know which fields to query.

Action:
```
{
  "action": "sql_db_schema",
  "action_input": "Artist, Album"
}
```
Observation[0m[33;1m[1;3m
CREATE TABLE "Album" (
	"AlbumId" INTEGER NOT NULL, 
	"Title" NVARCHAR(160) NOT NULL, 
	"ArtistId" INTEGER NOT NULL, 
	PRIMARY KEY ("AlbumId"), 
	FOREIGN KEY("ArtistId") REFERENCES "Artist" ("ArtistId")
)

/*
3 rows from Album table:
AlbumId	Title	ArtistId
1	For Those About To Rock We Salute You	1
2	Balls to the Wall	2
3	Restless and Wild	2
*/


CREATE TABLE "Artist" (
	"ArtistId" INTEGER NO

{'input': 'Who is the Artist with the most Albums?',
 'history': 'Human: Which is the table of the schema with more tuples?\nAI: The table with the most tuples is PlaylistTrack with 8715 tuples.',
 'output': 'The Artist with the most Albums is Iron Maiden with 21 Albums.'}