In [1]:
import os
from dotenv import load_dotenv
from langchain.agents import create_agent
from langchain.chat_models import init_chat_model
from langchain_community.utilities import SQLDatabase
from langchain_community.agent_toolkits import SQLDatabaseToolkit
from langchain.agents.middleware import HumanInTheLoopMiddleware 
from langgraph.checkpoint.memory import InMemorySaver 

In [2]:
checkpoint = InMemorySaver()

In [3]:
load_dotenv()

os.environ["LANGSMITH_TRACING"] = 'true'
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")
os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")

In [4]:
db = SQLDatabase.from_uri("sqlite:///Sample.db")

print(f"Dialect: {db.dialect}")
print(f"Available tables: {db.get_usable_table_names()}")
print(f'Sample output: {db.run("SELECT * FROM Artist LIMIT 5;")}')

Dialect: sqlite
Available tables: ['Album', 'Artist', 'Customer', 'Employee', 'Genre', 'Invoice', 'InvoiceLine', 'MediaType', 'Playlist', 'PlaylistTrack', 'Track']
Sample output: [(1, 'AC/DC'), (2, 'Accept'), (3, 'Aerosmith'), (4, 'Alanis Morissette'), (5, 'Alice In Chains')]


In [5]:
model = init_chat_model(model='llama-3.3-70b-versatile', temperature=0.3, model_provider='groq')

In [6]:
sql_toolkit = SQLDatabaseToolkit(db=db, llm=model)

query_tools = sql_toolkit.get_tools()

for tool in query_tools:
    print(f"{tool.name}: {tool.description}\n")

sql_db_query: Input to this tool is a detailed and correct SQL query, output is a result from the database. If the query is not correct, an error message will be returned. If an error is returned, rewrite the query, check the query, and try again. If you encounter an issue with Unknown column 'xxxx' in 'field list', use sql_db_schema to query the correct table fields.

sql_db_schema: Input to this tool is a comma-separated list of tables, output is the schema and sample rows for those tables. Be sure that the tables actually exist by calling sql_db_list_tables first! Example Input: table1, table2, table3

sql_db_list_tables: Input is an empty string, output is a comma-separated list of tables in the database.

sql_db_query_checker: Use this tool to double check if your query is correct before executing it. Always use this tool before executing a query with sql_db_query!



In [7]:
system_prompt = """
                You are an agent designed to interact with a SQL database.
                Given an input question, create a syntactically correct {dialect} query to run,
                then look at the results of the query and return the answer. Unless the user
                specifies a specific number of examples they wish to obtain, always limit your
                query to at most {top_k} results.
                You can order the results by a relevant column to return the most interesting
                examples in the database. Never query for all the columns from a specific table,
                only ask for the relevant columns given the question.
                You MUST double check your query before executing it. If you get an error while
                executing a query, rewrite the query and try again.
                DO NOT make any DML statements (INSERT, UPDATE, DELETE, DROP etc.) to the
                database. To start you should ALWAYS look at the tables in the database to see 
                what you can query. Do NOT skip this step.
                Then you should query the schema of the most relevant tables.
                """.format(dialect=db.dialect,top_k=5,)

In [8]:
agent = create_agent(
    model,
    query_tools,
    system_prompt=system_prompt,
    middleware=[
        HumanInTheLoopMiddleware(
            interrupt_on={"sql_db_query": True},
            description_prefix="Tool execution pending approval"
        )
    ],
    checkpointer=checkpoint,
)

In [9]:
question = "Which genre on average has the longest tracks?"

In [14]:
config = {"configurable": {"thread_id": "2"}} 

In [15]:
for step in agent.stream(
    {"messages": [{"role": "user", "content": question}]},
    config,
    stream_mode="values",
):
    if "messages" in step:
        step["messages"][-1].pretty_print()
    elif "__interrupt__" in step: 
        print("INTERRUPTED:") 
        interrupt = step["__interrupt__"][0] 
        for request in interrupt.value["action_requests"]: 
            print(request["description"]) 
    else:
        print(step)
        pass


Which genre on average has the longest tracks?
Tool Calls:
  sql_db_list_tables (3vrb9s6hm)
 Call ID: 3vrb9s6hm
  Args:
  sql_db_schema (9vzh3fgym)
 Call ID: 9vzh3fgym
  Args:
    table_names: genres, tracks
  sql_db_query_checker (1n0zmnvwc)
 Call ID: 1n0zmnvwc
  Args:
    query: SELECT genres.name, AVG(tracks.duration) FROM genres JOIN tracks ON genres.id = tracks.genre_id GROUP BY genres.name ORDER BY AVG(tracks.duration) DESC LIMIT 5
  sql_db_query (b8kxsqzt3)
 Call ID: b8kxsqzt3
  Args:
    query: SELECT genres.name, AVG(tracks.duration) FROM genres JOIN tracks ON genres.id = tracks.genre_id GROUP BY genres.name ORDER BY AVG(tracks.duration) DESC LIMIT 5
Tool Calls:
  sql_db_list_tables (3vrb9s6hm)
 Call ID: 3vrb9s6hm
  Args:
  sql_db_schema (9vzh3fgym)
 Call ID: 9vzh3fgym
  Args:
    table_names: genres, tracks
  sql_db_query_checker (1n0zmnvwc)
 Call ID: 1n0zmnvwc
  Args:
    query: SELECT genres.name, AVG(tracks.duration) FROM genres JOIN tracks ON genres.id = tracks.genre_id 