In [1]:
import os 
import sqlite3 

In [2]:
from langchain_community.utilities.sql_database import SQLDatabase
from pyprojroot import here # IMP
db_path = str(here("data")) + "/student.db"
db = SQLDatabase.from_uri(f"sqlite:///{db_path}")

In [3]:
print(db.get_table_info())


CREATE TABLE "STUDENT" (
	"NAME" VARCHAR(25), 
	"CLASS" VARCHAR(25), 
	"SECTION" VARCHAR(25), 
	"MARKS" INTEGER
)

/*
3 rows from STUDENT table:
NAME	CLASS	SECTION	MARKS
Krish	Data Science	A	90
Salman	CS	B	60
Sunny	Data Science	D	70
*/


In [4]:
db.get_usable_table_names

<bound method SQLDatabase.get_usable_table_names of <langchain_community.utilities.sql_database.SQLDatabase object at 0x00000200376EE350>>

In [5]:
from langchain.chains import create_sql_query_chain
from langchain_groq import ChatGroq
os.environ['GROQ_API_KEY'] = os.getenv('GROQ_API_KEY')

In [6]:
llm = ChatGroq(model="llama-3.2-11b-text-preview")

### NO use as static prompt

In [7]:
# First Test
sql_chain = create_sql_query_chain(llm,db)

sql_cmd = sql_chain.invoke({"question":"Tell me number of students from class data science"})
print(sql_cmd)

SELECT COUNT("NAME") 
FROM "STUDENT" 
WHERE "CLASS" = 'Data Science'


In [8]:
# TO run command on SQL DB
from langchain_community.tools.sql_database.tool import QuerySQLDataBaseTool
exe_query = QuerySQLDataBaseTool(db=db)
exe_query.invoke(sql_cmd)
# checker.invoke(sql_cmd)

'[(6,)]'

In [9]:
chain = sql_chain|exe_query
chain.invoke({"question":"Tell me number of students from class data science"})

'[(6,)]'

In [10]:
# Default Prompt
print(chain.get_prompts()[0].template)

You are a SQLite expert. Given an input question, first create a syntactically correct SQLite query to run, then look at the results of the query and return the answer to the input question.
Unless the user specifies in the question a specific number of examples to obtain, query for at most {top_k} results using the LIMIT clause as per SQLite. You can order the results to return the most informative data in the database.
Never query for all columns from a table. You must query only the columns that are needed to answer the question. Wrap each column name in double quotes (") to denote them as delimited identifiers.
Pay attention to use only the column names you can see in the tables below. Be careful to not query for columns that do not exist. Also, pay attention to which column is in which table.
Pay attention to use date('now') function to get the current date, if the question involves "today".

Use the following format:

Question: Question here
SQLQuery: SQL Query to run
SQLResult: 

In [11]:
# Customize Prompt for output

from operator import itemgetter
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough


answer_prompt = PromptTemplate.from_template("""
Given the following user question, corresponding sql query, and SQL result. Answer from user point of view and avoid mention of SQL query
If you don't find the answer ask user to repharse question.
Question:{question}
SQL_query:{query}
SQL_Result:{result}
Answer:
""")

In [21]:
rephrase_answer = answer_prompt | llm |StrOutputParser()

final_chain = (
    RunnablePassthrough.assign(query=sql_chain).assign(
        result = itemgetter("query") | exe_query
    )|rephrase_answer
)

final_chain.invoke({"question":""})

'The class for student Suman is Chemical.'

### IMP

In [7]:
## Adding few shot examples

examples = [
    {
        "input":"How many entries of records are present?",
        "query":"SELECT COUNT(*) from STUDENT;"
    },
    {
        "input":"Tell me all the students studying  in Data  science class?",
        "query":"SELECT * FROM STUDENT WHERE CLASS='Data Science';"
    },
    {
        "input":"Tell me number of the students studying in all class?",
        "query":"SELECT count(*) FROM STUDENT;"
    },
    {
        "input":"Tell me name of the students studying in mba class?",
        "query":"SELECT NAME FROM STUDENT where class='MBA';"
    }
]

In [None]:
from langchain_core.prompts import ChatPromptTemplate,MessagesPlaceholder,FewShotChatMessagePromptTemplate

example_prompt_temp = ChatPromptTemplate.from_messages([
    ("human","{input}\nSQL Query:"),
    ("ai","{query}")
])

few_shot_prompt = FewShotChatMessagePromptTemplate(
    example_prompt = example_prompt_temp,
    examples = examples,
    input_variables = ['input']

)
print(few_shot_prompt.format(input="How many students are there?"))

In [None]:
### Dynamic Fewshot Example selection

from langchain_community.vectorstores import Chroma
from langchain_core.example_selectors import SemanticSimilarityExampleSelector
from langchain_huggingface.embeddings import HuggingFaceEmbeddings

vector_store = Chroma()
vector_store.delete_collection()
example_selector = SemanticSimilarityExampleSelector.from_examples(
    examples,
    HuggingFaceEmbeddings(model_name = "all-MiniLM-L6-v2"),
    vector_store,
    k = 2,
    input_keys = ['input'],
)

example_selector.select_examples({"input":"Tell me name of student with lowest marks across all classes"})

In [None]:
sorted_shot_prompt = FewShotChatMessagePromptTemplate(
    example_prompt = example_prompt_temp,
    example_selector=example_selector,
    input_variables = ['input','top_k']

)
print(sorted_shot_prompt.format(input="How many students are there?",top_k = 3,table_info=""))

In [None]:
# Customizing Prompt

In [None]:
final_prompt = ChatPromptTemplate.from_messages(
     [
         ("system", "You are a MySQL expert. Given an input question, create a syntactically correct MySQL query to run. Unless otherwise specificed.\n\nHere is the relevant table info: {table_info}\n\nBelow are a number of examples of questions and their corresponding SQL queries."),
         few_shot_prompt,
         ("human", "{input}"),
     ]
 )
print(final_prompt.format(input="How many students are there?",table_info = "Some_info"))

In [None]:
# Final Pipeline

generate_query = create_sql_query_chain(llm,db,final_prompt)
# from langchain_community.tools.sql_database.tool import QuerySQLDataBaseTool
# exe_query = QuerySQLDataBaseTool(db=db)

# f_chain = (
#     RunnablePassthrough.assign(query=generate_query).assign(
#         result = itemgetter("query") |exe_query
#     )| rephrase_answer
# )

generate_query.invoke({"question":"Tell me name of students from class MBA"})

In [22]:
## Memory

In [31]:
from langchain.memory import ChatMessageHistory

history = ChatMessageHistory()

generate_query = create_sql_query_chain(llm,db)
chain = (
    RunnablePassthrough.assign(query=generate_query).assign(
        result = itemgetter("query") | exe_query
    )| rephrase_answer

)

In [32]:
chain.invoke({"question":"Tell me number of students of class MBA","message":history.messages})

'There are 2 students in the MBA class.'

In [33]:
history.add_user_message("Tell me number of students of class MBA")
history.add_ai_message("There are 2 students in the MBA class.")


In [34]:
history.messages

[HumanMessage(content='Tell me number of students of class MBA', additional_kwargs={}, response_metadata={}),
 AIMessage(content='There are 2 students in the MBA class.', additional_kwargs={}, response_metadata={})]

In [35]:
resp = chain.invoke({"question":"Can you list their names?","messages":history.messages}
                    )
resp

'Their names are:\n\n1. Krish\n2. Salman\n3. Sunny\n4. Sudhanshu\n5. swapnil\n6. Raviraj\n7. Gaurav\n8. Tarun\n9. Jay\n10. Sanjay\n11. Vishal\n12. Ravi\n13. Riya\n14. Hardik\n15. Suman\n16. Raahish\n17. Grace'