In [1]:
from sqlalchemy import create_engine
import urllib.parse
from langchain.sql_database import SQLDatabase
import os
from langchain_openai import ChatOpenAI
from dotenv import load_dotenv
import pandas as pd
from pydantic import BaseModel, Field
from langchain.chains.openai_tools import create_extraction_chain_pydantic
from typing import List 
from operator import itemgetter
from langchain_core.runnables import RunnablePassthrough
from langchain.chains import create_sql_query_chain
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_community.tools.sql_database.tool import QuerySQLDatabaseTool

load_dotenv()

True

### Prepare database

In [3]:
uid = os.getenv("SQLDB_UID") 
password = urllib.parse.quote_plus(os.getenv("SQLDB_PASSWORD")) 
connectionString=f"mssql+pyodbc://{uid}:{password}@testpfidb.database.windows.net/DBG_DATA?driver=ODBC+Driver+18+for+SQL+Server"

db_engine = create_engine(connectionString)
db = SQLDatabase(db_engine, view_support=True, schema="dbo")

# test the connection
print(db.dialect)
print(db.get_usable_table_names())
db.run("select convert(varchar(25), getdate(), 120)")

mssql
['Budget', 'CBLR_Raw', 'CHART OF ACCOUNTS', 'DEGU_Raw', 'GDP_Raw', 'GLA_Raw', 'KRI_DATA', 'NPL_Data_Raw', 'PFI_DATA', 'TBS', 'transformed_CBLR', 'transformed_DEGU', 'transformed_GLA', 'transformed_NPL']


"[('2025-08-24 17:12:30',)]"

In [None]:
query = '''SELECT N.date, N.NPL_diff, D.DEGU_diff
FROM NPL_Data_Raw N
INNER JOIN DEGU_Raw D ON N.date = D.date
WHERE N.date BETWEEN '2020-01-01' AND '2022-12-31'
ORDER BY N.date DESC
OFFSET 0 ROWS FETCH NEXT 5 ROWS ONLY;'''

In [None]:
executeQueryTool = QuerySQLDatabaseTool(db=db)
executeQuery = {"result": executeQueryTool.invoke(query)}
executeQuery

### Set the env variables

In [3]:
os.environ['OPENAI_API_KEY'] = os.getenv("OPEN_AI_API_KEY")
os.environ["LANGSMITH_API_KEY"] = os.getenv("LANGSMITH_API_KEY")
os.environ["LANGSMITH_TRACING"] = "true"

sql_agent_llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0) #used to generate sql query
table_extractor_llm = ChatOpenAI(model="gpt-4o-mini", temperature=0) #used to choose which tables to use for the query generation

# Convert question to SQL Query

### Load table details

In [4]:
# file created with the assumption that only these tables are relevant to the dashboard 
def get_table_details():
    # Read the CSV file into a DataFrame
    table_description = pd.read_csv("database_table_descriptions.csv")

    # Iterate over the DataFrame rows to create Document objects
    table_details = ""
    for index, row in table_description.iterrows():
        table_details = table_details + "Table Name:" + row['Table'] + "\n" + "Table Description:" + row['Description'] + "\n\n"

    return table_details

In [5]:
table_details = get_table_details()
print(table_details)

Table Name:CBLR_Raw
Table Description:Community Bank Leverage Ratio. The columns are date and CBLR_diff. CDLR_diff is the community bank leverage value. It is a number 

Table Name:DEGU_Raw
Table Description:Exchange Rate Depreciation. The columns are date and DEGU_diff. DEGU_diff is the exchange rate depreciation. It is a number 

Table Name:GDP_Raw
Table Description:Gross Domestic Product. The columns are date and GDP. GDP is the gross domestic product. It is a number

Table Name:GLA_Raw
Table Description:Gross Leasable Area. The columns are date and GLA_diff. GLA is gross leasable area. It is a number 

Table Name:NPL_Data_Raw
Table Description:Non-Paying Loans. The columns are date and NPL_diff. NPL is non-paying loans. It is a number 

Table Name:transformed_CBLR
Table Description:Community Bank Leverage Ratio. This table is the transformed version of CBLR_Raw. The columns are date and CBLR_diff. CDLR_diff is the community bank leverage value. It is a number 

Table Name:transform

### Prepare Table class

In [6]:
class Table(BaseModel):
    """
    Represents a table in the SQL database.

    Attributes:
        name (str): The name of the table in the SQL database.
    """
    name: str = Field(description="Name of table in SQL database.")

### Dynamic table selection

In [7]:
table_details_prompt = f"""Return the names of ALL the SQL tables that MIGHT be relevant to the user question. \
The tables are:

{table_details}

Use only table names that are in the table_details.

Remember to include ALL POTENTIALLY RELEVANT tables, even if you're not sure that they're needed."""

# extract all table entities 
table_chain = create_extraction_chain_pydantic(Table, table_extractor_llm, system_message=table_details_prompt)

  table_chain = create_extraction_chain_pydantic(Table, table_extractor_llm, system_message=table_details_prompt)


In [8]:
tables = table_chain.invoke({"input": "give me the GDPs in 2025 and the dates for them"})
tables

[Table(name='GDP_Raw'),
 Table(name='transformed_GLA'),
 Table(name='transformed_NPL')]

In [9]:
def get_tables(tables: List[Table]) -> List[str]:
    tables  = [table.name for table in tables]
    return tables

select_table = {"input": itemgetter("question")} | create_extraction_chain_pydantic(Table, table_extractor_llm, system_message=table_details_prompt) | get_tables
select_table.invoke({"question": "give me the GDPs in 2025 and the dates for them"})

['GDP_Raw',
 'transformed_GLA',
 'transformed_CBLR',
 'transformed_DEGU',
 'transformed_NPL']

### Creating final chain for sql gen

In [None]:
# query_chain = create_sql_query_chain(sql_agent_llm, db, prompt=table_details_prompt)

In [11]:
query_chain = create_sql_query_chain(sql_agent_llm, db)
# convert question to table needed
table_chain = {"input": itemgetter("question")} | create_extraction_chain_pydantic(Table, table_extractor_llm, system_message=table_details_prompt) | get_tables
# convert question to sql query
generate_query = RunnablePassthrough.assign(table_names_to_use=table_chain) | query_chain

In [12]:
query = generate_query.invoke(
    {"question": "give me the GDPs in 2025 and the dates for them"}
)
type(query)

str

# Run query to get data

In [17]:
from langchain_community.tools.sql_database.tool import QuerySQLDatabaseTool
executeQueryTool = QuerySQLDatabaseTool(db=db)
executeQuery = {"result": executeQueryTool.invoke(query)}
executeQuery

{'result': "[('31/03/2025', 5.300000190734863)]"}

# Convert question, query, message to answer with llm

In [None]:
system_role = """Given the following user question, corresponding SQL query, and SQL result, answer the user question.\n
    Question: {question}\n
    SQL Query: {query}\n
    SQL Result: {result}\n
    Answer:
    """

answer_prompt = PromptTemplate.from_template(
    system_role)

answer = answer_prompt | sql_agent_llm | StrOutputParser()

In [20]:
chain = (RunnablePassthrough.assign(table_names_to_use=select_table) | 
         RunnablePassthrough.assign(query=generate_query).assign(
        result=itemgetter("query") | executeQueryTool) 
        | answer
        )

In [21]:
chain.invoke({"question": "What was the exchange rate in september 2023"})

'The exchange rate in September 2023 was 11.0629.'

#  Prepare tool

In [None]:
class AzureSQLAgent:
    def __init__(self, connectionString, dbSchema, sqlLLM, tableLLM, llmTemp):
        db_engine = create_engine(connectionString)
        self.db = SQLDatabase(db_engine, view_support=True, schema=dbSchema)
        self.sql_agent_llm = ChatOpenAI(model=sqlLLM, temperature=llmTemp) #used to generate sql query
        self.table_extractor_llm = ChatOpenAI(model=tableLLM, temperature=llmTemp) #used to choose which tables to use for the query generation
        
        
    def get_table_details():
        # Read the CSV file into a DataFrame
        table_description = pd.read_csv("database_table_descriptions.csv")

        # Iterate over the DataFrame rows to create Document objects
        table_details = ""
        for index, row in table_description.iterrows():
            table_details = table_details + "Table Name:" + row['Table'] + "\n" + "Table Description:" + row['Description'] + "\n\n"

        return table_details
    
    def get_tables(tables: List[Table]) -> List[str]:
        tables  = [table.name for table in tables]
        return tables
    
    class Table(BaseModel):
        """
        Represents a table in the SQL database.

        Attributes:
            name (str): The name of the table in the SQL database.
        """
        name: str = Field(description="Name of table in SQL database.")
                