Establish the connection to the database or the content from the url

In [4]:
import yaml
from sqlalchemy import create_engine
from langchain_community.utilities import SQLDatabase

with open('../config/config.yml', 'r') as file:
    config = yaml.safe_load(file)

mysql_config = config.get('mysql')

username = mysql_config.get('username')
password = mysql_config.get('password')
host = mysql_config.get('host')
port = mysql_config.get('port')
database = mysql_config.get('database')

connection_string = f"mysql+mysqlconnector://{username}:{password}@{host}:{port}/{database}"
engine = create_engine(connection_string)

db = SQLDatabase(engine)

Create the connection to the database by inputing the api_key and making the connection to the clod provider

In [1]:
from getpass import getpass
import os
from langchain.chat_models import init_chat_model

os.environ["OPENAI_API_KEY"] = getpass()
llm = init_chat_model("gpt-4o-mini", model_provider="openai")

Create the fileds for the State that is going to be parsed among the agent pipeline

In [2]:
from typing_extensions import TypedDict


class State(TypedDict):
    question: str
    query: str
    result: str
    answer: str

# sets the fields that are passed among the states

Populate the model with the predifined prompts designed to tell the model in what direction it should navigate the response

In [None]:
from langchain import hub

query_prompt_template = hub.pull("langchain-ai/sql-query-system-prompt")

assert len(query_prompt_template.messages) == 1
query_prompt_template.messages[0].pretty_print()

Populate the generated string ouptuted from the previous cell and fill it with an appropriate query

In [5]:
from typing_extensions import Annotated


class QueryOutput(TypedDict):
    """Generated SQL query."""

    query: Annotated[str, ..., "Syntactically valid SQL query."]


def write_query(state: State):
    """Generate SQL query to fetch information."""
    prompt = query_prompt_template.invoke(
        {
            "dialect": db.dialect,
            "top_k": 10,
            "table_info": db.get_table_info(),
            "input": state["question"],
        }
    )
    structured_llm = llm.with_structured_output(QueryOutput)
    result = structured_llm.invoke(prompt)
    return {"query": result["query"]}

In [6]:
write_query({"question": "How many Employees are there?"})

{'query': 'SELECT COUNT(*) AS employee_count FROM employee;'}

In [None]:
# TODO: find the way to create a technique to choose only the most appropriate tables from the list of tables the user have at the moment

Creating the method (tool) that is responsible for the running the generated query against the database

In [7]:
from langchain_community.tools.sql_database.tool import QuerySQLDatabaseTool

# notice that the instance of the state is passed along all the methods within a chain
def execute_query(state: State):
    """Execute SQL query."""
    execute_query_tool = QuerySQLDatabaseTool(db=db)
    return {"result": execute_query_tool.invoke(state["query"])}

The way how it can be called: 

In [9]:
execute_query({"query": "SELECT COUNT(*) AS employee_count FROM employee;"})

{'result': '[(4,)]'}

The next tool is responsible for generation of the human like response. In future generations, should be changed to generation of the grpah

In [10]:
def generate_answer(state: State):
    """Answer question using retrieved information as context."""
    prompt = (
        "Given the following user question, corresponding SQL query, "
        "and SQL result, answer the user question.\n\n"
        f'Question: {state["question"]}\n'
        f'SQL Query: {state["query"]}\n'
        f'SQL Result: {state["result"]}'
    )
    response = llm.invoke(prompt)
    return {"answer": response.content}

Creating the pipeline of the chains to create the end-to-end agent

In [None]:
from langgraph.graph import START, StateGraph

graph_builder = StateGraph(State).add_sequence(
    [write_query, execute_query, generate_answer]
)
graph_builder.add_edge(START, "write_query") #setting write_query as the starting point
graph = graph_builder.compile()

Example of the possible interaction by the human in the chain

In [None]:
from langgraph.checkpoint.memory import MemorySaver

memory = MemorySaver()
# pay attention to the thing that the human interaction is inserted before running the execute_query
graph = graph_builder.compile(checkpointer=memory, interrupt_before=["execute_query"])

config = {"configurable": {"thread_id": "1"}}

Below is the possible way of how it can be integrated to the loop via wrapping the code in teh rty-catch block

In [None]:
for step in graph.stream(
    {"question": "How many employees are there?"},
    config,
    stream_mode="updates",
):
    print(step)

# loop should be prevented before running the query and the user's answer is passed forward

try:
    user_approval = input("Do you want to go to execute query? (yes/no): ")
except Exception:
    user_approval = "no"

if user_approval.lower() == "yes":
    # If approved, continue the graph execution
    for step in graph.stream(None, config, stream_mode="updates"):
        print(step)
else:
    print("Operation cancelled by user.")