In [1]:
import os 
import sqlite3 

In [2]:
from langchain_community.utilities.sql_database import SQLDatabase
from pyprojroot import here # IMP
db_path = str(here("data")) + "/student.db"
db = SQLDatabase.from_uri(f"sqlite:///{db_path}")

In [3]:
print(db.get_table_info())


CREATE TABLE "STUDENT" (
	"NAME" VARCHAR(25), 
	"CLASS" VARCHAR(25), 
	"SECTION" VARCHAR(25), 
	"MARKS" INTEGER
)

/*
3 rows from STUDENT table:
NAME	CLASS	SECTION	MARKS
Krish	Data Science	A	90
Salman	CS	B	60
Sunny	Data Science	D	70
*/


In [4]:
db.get_usable_table_names

<bound method SQLDatabase.get_usable_table_names of <langchain_community.utilities.sql_database.SQLDatabase object at 0x000001A69C74E350>>

In [5]:
from langchain.chains import create_sql_query_chain
from langchain_groq import ChatGroq
os.environ['GROQ_API_KEY'] = os.getenv('GROQ_API_KEY')

In [6]:
llm = ChatGroq(model="gemma2-9b-it")

In [7]:
llm.invoke("Hellow")

AIMessage(content='Hello! 👋  How can I help you today? 😊 \n', additional_kwargs={}, response_metadata={'token_usage': {'completion_tokens': 16, 'prompt_tokens': 11, 'total_tokens': 27, 'completion_time': 0.029090909, 'prompt_time': 0.00017858, 'queue_time': 0.013972987000000001, 'total_time': 0.029269489}, 'model_name': 'gemma2-9b-it', 'system_fingerprint': 'fp_10c08bf97d', 'finish_reason': 'stop', 'logprobs': None}, id='run-47351176-ce5f-415e-ba7c-c96ea6dc5094-0', usage_metadata={'input_tokens': 11, 'output_tokens': 16, 'total_tokens': 27})

### NO use as static prompt

In [None]:
# First Test
sql_chain = create_sql_query_chain(llm,db)

sql_cmd = sql_chain.invoke({"question":"Tell me number of students from class data science"})
print(sql_cmd)

In [8]:
# TO run command on SQL DB
from langchain_community.tools.sql_database.tool import QuerySQLDataBaseTool
exe_query = QuerySQLDataBaseTool(db=db)
# exe_query.invoke(sql_cmd)
# checker.invoke(sql_cmd)

In [None]:
chain = sql_chain|exe_query
chain.invoke({"question":"Tell me number of students from class data science"})

In [None]:
# Default Prompt
print(chain.get_prompts()[0].template)

In [11]:
# Customize Prompt for output

from operator import itemgetter
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough


answer_prompt = PromptTemplate.from_template("""
Given the following user question, corresponding sql query, and SQL result. Answer from user point of view and avoid mention of SQL query
If you don't find the answer ask user to repharse question.
Question:{question}
SQL_query:{query}
SQL_Result:{result}
Answer:
""")
                                            


In [None]:
rephrase_answer = answer_prompt | llm |StrOutputParser()

final_chain = (
    RunnablePassthrough.assign(query=sql_chain).assign(
        result = itemgetter("query") | exe_query
    )|rephrase_answer
)

final_chain.invoke({"question":""})

### Few Shots 

In [8]:
## Adding few shot examples

examples = [
    {
        "input":"How many entries of records are present?",
        "query":"SELECT COUNT(*) from STUDENT;"
    },
    {
        "input":"Tell me all the students studying  in Data  science class?",
        "query":"SELECT * FROM STUDENT WHERE CLASS='Data Science';"
    },
    {
        "input":"Tell me number of the students studying in all class?",
        "query":"SELECT count(*) FROM STUDENT;"
    },
    {
        "input":"Tell me name of the students studying in mba class?",
        "query":"SELECT NAME FROM STUDENT where class='MBA';"
    }
]

In [14]:
from langchain_core.prompts import ChatPromptTemplate,FewShotChatMessagePromptTemplate

# Customize Prompt for output

from operator import itemgetter
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough


answer_prompt = PromptTemplate.from_template("""
Given the following user question, corresponding sql query, and SQL result. Answer from user point of view and avoid mention of SQL query
If you don't find the answer ask user to repharse question.
Question:{question}
SQL_query:{query}
SQL_Result:{result}
Answer:
""")

rephrase_answer = answer_prompt | llm |StrOutputParser()

example_prompt = ChatPromptTemplate.from_messages(
     [
         ("human", "{input}\nSQLQuery:"),
         ("ai", "{query}"),
     ]
 )
few_shot_prompt = FewShotChatMessagePromptTemplate(
     example_prompt=example_prompt,
     examples=examples,
     input_variables=["input","top_k",'table_info']
 )
print(few_shot_prompt.format(input="How many students are there?",top_k = '',table_info = ""))

Human: How many entries of records are present?
SQLQuery:
AI: SELECT COUNT(*) from STUDENT;
Human: Tell me all the students studying  in Data  science class?
SQLQuery:
AI: SELECT * FROM STUDENT WHERE CLASS='Data Science';
Human: Tell me number of the students studying in all class?
SQLQuery:
AI: SELECT count(*) FROM STUDENT;
Human: Tell me name of the students studying in mba class?
SQLQuery:
AI: SELECT NAME FROM STUDENT where class='MBA';


### Dynamic FewShot selector

In [17]:
### Dynamic Fewshot Example selection

from langchain_community.vectorstores import Chroma
from langchain_core.example_selectors import SemanticSimilarityExampleSelector
from langchain_huggingface.embeddings import HuggingFaceEmbeddings

vector_store = Chroma()
vector_store.delete_collection()
example_selector = SemanticSimilarityExampleSelector.from_examples(
    examples,
    HuggingFaceEmbeddings(model_name = "all-MiniLM-L6-v2"),
    vector_store,
    k = 2,
    input_keys = ['input'],
)

example_selector.select_examples({"input":"Tell me name of student with lowest marks across all classes"})


sorted_shot_prompt = FewShotChatMessagePromptTemplate(
    example_prompt = example_prompt,
    example_selector=example_selector,  ## This line is modified only
    input_variables=["input","top_k",'table_info']
)
print(sorted_shot_prompt.format(input="How many students are there?",top_k = 3,table_info=""))

  vector_store = Chroma()
  from tqdm.autonotebook import tqdm, trange


Human: Tell me number of the students studying in all class?
SQLQuery:
AI: SELECT count(*) FROM STUDENT;
Human: Tell me name of the students studying in mba class?
SQLQuery:
AI: SELECT NAME FROM STUDENT where class='MBA';


## Customize prompt

In [18]:
from langchain_core.prompts import ChatPromptTemplate

from langchain_community.tools.sql_database.tool import QuerySQLDataBaseTool
exe_query = QuerySQLDataBaseTool(db=db)
final_prompt = ChatPromptTemplate.from_messages(
     [
         ("system", "You are a MySQL expert. Given an input question, create a syntactically correct MySQL query to run. Unless otherwise specificed.\n\nHere is the relevant table info: {table_info}\n\nBelow are a number of examples of questions and their corresponding SQL queries."),
        #  few_shot_prompt,
        sorted_shot_prompt,
         ("human", "OutPut should only Contain SQL query and Nothing else and nothing else associated with it. {input}"),
     ]
 )

generate_query = create_sql_query_chain(llm,db,final_prompt)

chain = (
 RunnablePassthrough.assign(query=generate_query).assign(
     result=itemgetter("query") | exe_query
 )
 | rephrase_answer
 )
chain.invoke({"question": "Tell me name of students of class MBA"})

'The students in the MBA class are Raahish and Grace. \n'

### Memory to chat

In [33]:
from langchain.memory import ChatMessageHistory
from langchain_core.prompts import MessagesPlaceholder

history = ChatMessageHistory()

memory_prompt = ChatPromptTemplate.from_messages(
     [
         ("system", "You are a MySQL expert. Given an input question, create a syntactically correct MySQL query to run. Unless otherwise specificed.\n\nHere is the relevant table info: {table_info}\n\nBelow are a number of examples of questions and their corresponding SQL queries."),
        #  few_shot_prompt,
        sorted_shot_prompt,
        MessagesPlaceholder(variable_name='messages'),
         ("human", "OutPut should only Contain SQL query and Nothing else and nothing else associated with it. {input}"),
     ]
 )


In [34]:
generate_query = create_sql_query_chain(llm,db,memory_prompt)

chat_chain = (
    RunnablePassthrough.assign(query=generate_query).assign(result=itemgetter("query")|exe_query) |rephrase_answer
)

In [35]:
chat_chain.invoke({'question':"tell me number of students from class MBA","messages":history.messages})

'There are 2 students in the MBA class. \n'

In [36]:
history.add_user_message("tell me number of students from class MBA")
history.add_ai_message("There are 2 students in the MBA class.")

In [38]:
chat_chain.invoke({'question':"tell me names of those students","messages":history.messages})

'The students in the MBA class are Raahish and Grace. \n'

In [2]:
import pandas as pd

In [4]:
data = pd.read_csv("C:/Users/shambhuraj patil/Desktop/New_career/Git_repos/Genai_project/data/diabetes.csv")

In [11]:
print(data.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 768 entries, 0 to 767
Data columns (total 9 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   Pregnancies               768 non-null    int64  
 1   Glucose                   768 non-null    int64  
 2   BloodPressure             768 non-null    int64  
 3   SkinThickness             768 non-null    int64  
 4   Insulin                   768 non-null    int64  
 5   BMI                       768 non-null    float64
 6   DiabetesPedigreeFunction  768 non-null    float64
 7   Age                       768 non-null    int64  
 8   Outcome                   768 non-null    int64  
dtypes: float64(2), int64(7)
memory usage: 54.1 KB
None


In [14]:
data.columns

Index(['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin',
       'BMI', 'DiabetesPedigreeFunction', 'Age', 'Outcome'],
      dtype='object')