Import Necessary Modules

In [63]:
from operator import itemgetter
from helpers import get_llm, get_retriever
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate, PromptTemplate
from langchain_core.messages import HumanMessage
from langchain_core.runnables import Runnable, RunnablePassthrough, RunnableParallel, RunnableLambda, chain
from langchain_core.tools import tool
from typing import Annotated
import requests
from langchain.callbacks.tracers import ConsoleCallbackHandler

Configure Database

In [64]:
from helpers.config import Config
from sqlalchemy import create_engine

sql_engine = create_engine(f"{Config.POSTGRES_CONNECTION_STRING}/sql_agent")

Ingest Data

In [None]:
import pandas as pd
from helpers import get_vector_store_instance
from langchain_core.documents import Document

df = pd.read_excel("./sample_data/brillar_bank_fixed_deposit.xlsx", sheet_name=None)


vector_store = get_vector_store_instance(embedding_model="text-embedding-3-large", dimension=256, index_name="test", vector_db="qdrant")
vector_store.add_documents(
    [
        Document(
            page_content="This data includes Brillar Bank's fixed deposit accounts and the customer information. This includes customer name, fixed deposit type, amount in RM, term of deposit in months, customer id, customer age, customer location, and customer phone number.",
            metadata={"search_type": "xlsx", "source": "brillar_bank_fixed_deposit.xlsx"},
        )
    ]
)
for sheet_name, df in df.items():
    table_name = sheet_name.replace(" ", "_").lower()
    df.to_sql(name=table_name, con=sql_engine, if_exists="replace", index=False)
    print(f"Inserted data from sheet '{sheet_name}' into table '{table_name}'")
    

Inserted data from sheet 'brillar_bank' into table 'brillar_bank'


Initiate LLM

In [65]:
llm = get_llm("gpt-4o")

In [67]:
from langchain_core.tools import tool
from langchain_community.utilities import SQLDatabase
from langchain.chains.sql_database.query import create_sql_query_chain
from langchain_community.tools.sql_database.tool import QuerySQLDataBaseTool
from langgraph.graph import END, START, StateGraph, MessagesState

def classification(state: MessagesState):
  classification_prompt = """Analyze the question and decide that if the question can be answered from a structured data or unstructured data. You should only response STRUCTURED or UNSTRUCTURED.

        Structured data schema:
        [Customer Name, Fixed Deposit Type, Amount (RM), Term (Months), Customer Age, Customer Location, Customer Phone Number]
        
        Question: {question}
        Output: """
  
  classification_template = ChatPromptTemplate.from_template(classification_prompt)
  classification_chain = classification_template | llm | StrOutputParser()
  response = classification_chain.invoke(state["messages"])

  if response == "STRUCTURED":
    return "sql"
  else:
    return "rag"


def rag_chain(state: MessagesState):
  messages = state["messages"]
  qa_instructions = (
        """Answer the user question given the following context:\n\n{context}."""
    )
  qa_prompt = ChatPromptTemplate.from_messages(
        [("system", qa_instructions), ("human", "{question}")]
    )
  
  retriever = get_retriever(
        index_name="test",
        embedding_model="text-embedding-3-large",
        dimension=256,
        vector_db="qdrant",
        top_k=5,
    )
  
  chain = {
                "question": itemgetter("question"),
                "context": itemgetter("question") | retriever,
            } | qa_prompt | llm | StrOutputParser()
  
  response = chain.invoke({"question": messages[-1].content})
  return {"messages": [response]}


def sql_chain(state: MessagesState):
  messages = state["messages"]
  sql_instruction = """
        You are a PostgreSQL expert. Given an input question, first create a syntactically correct PostgreSQL query, and then ONLY return the plain query. No markdown format or explanation is needed.
        Unless the user specifies in the question a specific number of examples to obtain, query for at most {top_k} results using the LIMIT clause as per PostgreSQL. You can order the results to return the most informative data in the database.
        Never query for all columns from a table. You must query only the columns that are needed to answer the question. Wrap each column name in double quotes (") to denote them as delimited identifiers.
        Pay attention to use only the column names you can see in the tables below. Be careful to not query for columns that do not exist. Also, pay attention to which column is in which table.
        Pay attention to use CURRENT_DATE function to get the current date, if the question involves "today".

        Only use the following tables:
        {table_info}

        Question: {input}
        """
  
  sql_prompt = PromptTemplate.from_template(template=sql_instruction)

  db = SQLDatabase(engine=sql_engine)

  write_query = create_sql_query_chain(llm, db, prompt=sql_prompt)
  execute_query = QuerySQLDataBaseTool(db=db)
  sql_prompt = PromptTemplate.from_template(
      """Given the following user question, corresponding SQL query, and SQL result, answer the user question. You don't need to tell the user that you are using SQL.

  Question: {question}
  SQL Query: {query}
  SQL Result: {result}
  Answer: """
  )

  sql_chain = RunnablePassthrough.assign(query=write_query).assign(result=itemgetter("query") | execute_query) | sql_prompt | llm | StrOutputParser()

  response = sql_chain.invoke({"question": messages[-1].content})
  return {"messages": [response]}

In [70]:
workflow = StateGraph(MessagesState)
workflow.add_node("rag", rag_chain)
workflow.add_node("sql", sql_chain)

workflow.add_conditional_edges(START, classification)
workflow.add_edge("rag", END)
workflow.add_edge("sql", END)

app = workflow.compile()

In [73]:
app.invoke({"messages": [HumanMessage(content="How many types of fixed deposit do you offer?")]})

{'messages': [HumanMessage(content='How many types of fixed deposit do you offer?', id='828b738d-51b1-4a33-b7fc-cc533d644097'),
  HumanMessage(content='We offer 6 types of fixed deposit.', id='f811991b-5be3-412f-b5ef-7ce7c9b3c8e8')]}