In [1]:
#pip install google-generativeai
#pip install cryptography

import os
import langchain
from langchain.llms.google_palm import GooglePalm
from langchain.utilities import SQLDatabase
from langchain.prompts import FewShotPromptTemplate
from langchain.chains.sql_database.prompt import PROMPT_SUFFIX, _mysql_prompt
from langchain.prompts.prompt import PromptTemplate
from langchain.prompts import SemanticSimilarityExampleSelector
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from langchain_experimental.sql import SQLDatabaseChain
import warnings
warnings.simplefilter("ignore")

In [2]:
with open('googlepalmapikey.txt','r') as f:
    apikey = f.read()

os.environ["GOOGLE_API_KEY"] = apikey

In [4]:
llm  = GooglePalm()

In [5]:
# connect to sql database

db_user = 'root'
db_pass = 'root'
db_host = 'localhost'
db_name = 'atliq_tshirts'

In [6]:
db_auth = f'mysql+pymysql://{db_user}:{db_pass}@{db_host}/{db_name}'
db = SQLDatabase.from_uri( db_auth,sample_rows_in_table_info=3)

In [8]:
print(db.table_info)


CREATE TABLE discounts (
	discount_id INTEGER NOT NULL AUTO_INCREMENT, 
	t_shirt_id INTEGER NOT NULL, 
	pct_discount DECIMAL(5, 2), 
	PRIMARY KEY (discount_id), 
	CONSTRAINT discounts_ibfk_1 FOREIGN KEY(t_shirt_id) REFERENCES t_shirts (t_shirt_id), 
	CONSTRAINT discounts_chk_1 CHECK ((`pct_discount` between 0 and 100))
)DEFAULT CHARSET=utf8mb4 ENGINE=InnoDB COLLATE utf8mb4_0900_ai_ci

/*
3 rows from discounts table:
discount_id	t_shirt_id	pct_discount
1	1	10.00
2	2	15.00
3	3	20.00
*/


CREATE TABLE t_shirts (
	t_shirt_id INTEGER NOT NULL AUTO_INCREMENT, 
	brand ENUM('Van Huesen','Levi','Nike','Adidas') NOT NULL, 
	color ENUM('Red','Blue','Black','White') NOT NULL, 
	size ENUM('XS','S','M','L','XL') NOT NULL, 
	price INTEGER, 
	stock_quantity INTEGER NOT NULL, 
	PRIMARY KEY (t_shirt_id), 
	CONSTRAINT t_shirts_chk_1 CHECK ((`price` between 10 and 50))
)DEFAULT CHARSET=utf8mb4 ENGINE=InnoDB COLLATE utf8mb4_0900_ai_ci

/*
3 rows from t_shirts table:
t_shirt_id	brand	color	size	price	stock

In [9]:
db_chain = SQLDatabaseChain.from_llm(llm, db, verbose=True)
qns1 = db_chain("How many t-shirts do we have left for nike in extra small size and white color?")
qns1 = qns1.get("result")



[1m> Entering new SQLDatabaseChain chain...[0m
How many t-shirts do we have left for nike in extra small size and white color?
SQLQuery:[32;1m[1;3mSELECT stock_quantity FROM t_shirts WHERE brand = 'Nike' AND color = 'White' AND size = 'XS'[0m
SQLResult: [33;1m[1;3m[(22,)][0m
Answer:[32;1m[1;3m22[0m
[1m> Finished chain.[0m


In [10]:
qns2 = db_chain.run("How much is the price of the inventory for all small size t-shirts?")
# there is a mistake here , it should have been price X number of units



[1m> Entering new SQLDatabaseChain chain...[0m
How much is the price of the inventory for all small size t-shirts?
SQLQuery:[32;1m[1;3mSELECT SUM(price) FROM t_shirts WHERE size = 'S'[0m
SQLResult: [33;1m[1;3m[(Decimal('341'),)][0m
Answer:[32;1m[1;3m341[0m
[1m> Finished chain.[0m


In [11]:
sql_code = """
select sum(a.total_amount * ((100-COALESCE(discounts.pct_discount,0))/100)) as total_revenue from
(select sum(price*stock_quantity) as total_amount, t_shirt_id from t_shirts where brand = 'Levi'
group by t_shirt_id) a left join discounts on a.t_shirt_id = discounts.t_shirt_id
 """

qns3 = db_chain.run(sql_code)



[1m> Entering new SQLDatabaseChain chain...[0m

select sum(a.total_amount * ((100-COALESCE(discounts.pct_discount,0))/100)) as total_revenue from
(select sum(price*stock_quantity) as total_amount, t_shirt_id from t_shirts where brand = 'Levi'
group by t_shirt_id) a left join discounts on a.t_shirt_id = discounts.t_shirt_id
 
SQLQuery:[32;1m[1;3mselect sum(a.total_amount * ((100-COALESCE(discounts.pct_discount,0))/100)) as total_revenue from
(select sum(price*stock_quantity) as total_amount, t_shirt_id from t_shirts where brand = 'Levi'
group by t_shirt_id) a left join discounts on a.t_shirt_id = discounts.t_shirt_id[0m
SQLResult: [33;1m[1;3m[(Decimal('17543.500000'),)][0m
Answer:[32;1m[1;3m17543.5[0m
[1m> Finished chain.[0m


In [12]:
qns4 = db_chain.run("SELECT SUM(price * stock_quantity) FROM t_shirts WHERE brand = 'Levi'")
qns5 = db_chain.run("How many white color Levi's t shirts we have available?")



[1m> Entering new SQLDatabaseChain chain...[0m
SELECT SUM(price * stock_quantity) FROM t_shirts WHERE brand = 'Levi'
SQLQuery:[32;1m[1;3mSELECT SUM(price * stock_quantity) FROM t_shirts WHERE brand = 'Levi'[0m
SQLResult: [33;1m[1;3m[(Decimal('18580'),)][0m
Answer:[32;1m[1;3m18580[0m
[1m> Finished chain.[0m


[1m> Entering new SQLDatabaseChain chain...[0m
How many white color Levi's t shirts we have available?
SQLQuery:[32;1m[1;3mSELECT stock_quantity FROM t_shirts WHERE brand = 'Levi' AND color = 'White'[0m
SQLResult: [33;1m[1;3m[(19,), (20,), (96,), (76,)][0m
Answer:[32;1m[1;3m19[0m
[1m> Finished chain.[0m


In [13]:
few_shots = [
    {'Question' : "How many t-shirts do we have left for Nike in XS size and white color?",
     'SQLQuery' : "SELECT sum(stock_quantity) FROM t_shirts WHERE brand = 'Nike' AND color = 'White' AND size = 'XS'",
     'SQLResult': "Result of the SQL query",
     'Answer' : qns1},
    {'Question': "How much is the total price of the inventory for all S-size t-shirts?",
     'SQLQuery':"SELECT SUM(price*stock_quantity) FROM t_shirts WHERE size = 'S'",
     'SQLResult': "Result of the SQL query",
     'Answer': qns2},
    {'Question': "If we have to sell all the Levi’s T-shirts today with discounts applied. How much revenue  our store will generate (post discounts)?" ,
     'SQLQuery' : """SELECT sum(a.total_amount * ((100-COALESCE(discounts.pct_discount,0))/100)) as total_revenue from
(select sum(price*stock_quantity) as total_amount, t_shirt_id from t_shirts where brand = 'Levi'
group by t_shirt_id) a left join discounts on a.t_shirt_id = discounts.t_shirt_id
 """,
     'SQLResult': "Result of the SQL query",
     'Answer': qns3} ,
     {'Question' : "If we have to sell all the Levi’s T-shirts today. How much revenue our store will generate without discount?" ,
      'SQLQuery': "SELECT SUM(price * stock_quantity) FROM t_shirts WHERE brand = 'Levi'",
      'SQLResult': "Result of the SQL query",
      'Answer' : qns4},
    {'Question': "How many white color Levi's shirt I have?",
     'SQLQuery' : "SELECT sum(stock_quantity) FROM t_shirts WHERE brand = 'Levi' AND color = 'White'",
     'SQLResult': "Result of the SQL query",
     'Answer' : qns5
     }
]

In [14]:
embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
vectorize = [" ".join(example.values()) for example in few_shots]

In [15]:
vectorstore = Chroma.from_texts(vectorize, embeddings, metadatas=few_shots)

In [16]:
example_selector = SemanticSimilarityExampleSelector(
    vectorstore=vectorstore,
    k=2,
)

example_selector.select_examples({"Question": "How many Adidas T shirts I have left in my store?"})

[{'Answer': '22',
  'Question': 'How many t-shirts do we have left for Nike in XS size and white color?',
  'SQLQuery': "SELECT sum(stock_quantity) FROM t_shirts WHERE brand = 'Nike' AND color = 'White' AND size = 'XS'",
  'SQLResult': 'Result of the SQL query'},
 {'Answer': '19',
  'Question': "How many white color Levi's shirt I have?",
  'SQLQuery': "SELECT sum(stock_quantity) FROM t_shirts WHERE brand = 'Levi' AND color = 'White'",
  'SQLResult': 'Result of the SQL query'}]

In [17]:
example_prompt = PromptTemplate(
    input_variables=["Question", "SQLQuery", "SQLResult","Answer",],
    template="\nQuestion: {Question}\nSQLQuery: {SQLQuery}\nSQLResult: {SQLResult}\nAnswer: {Answer}",
)

In [18]:
few_shot_prompt = FewShotPromptTemplate(
    example_selector=example_selector,
    example_prompt=example_prompt,
    prefix=_mysql_prompt,
    suffix=PROMPT_SUFFIX,
    input_variables=["input", "table_info", "top_k"], #These variables are used in the prefix and suffix
)

In [19]:
new_chain = SQLDatabaseChain.from_llm(llm, db, verbose=True, prompt=few_shot_prompt)

In [20]:
new_chain("How many white color Levi's shirt I have?")



[1m> Entering new SQLDatabaseChain chain...[0m
How many white color Levi's shirt I have?
SQLQuery:[32;1m[1;3mSELECT sum(stock_quantity) FROM t_shirts WHERE brand = 'Levi' AND color = 'White'[0m
SQLResult: [33;1m[1;3m[(Decimal('211'),)][0m
Answer:[32;1m[1;3m211[0m
[1m> Finished chain.[0m


{'query': "How many white color Levi's shirt I have?", 'result': '211'}

In [21]:
new_chain("How much is the price of the inventory for all small size t-shirts?")



[1m> Entering new SQLDatabaseChain chain...[0m
How much is the price of the inventory for all small size t-shirts?
SQLQuery:[32;1m[1;3mSELECT SUM(price * stock_quantity) FROM t_shirts WHERE size = 'S'[0m
SQLResult: [33;1m[1;3m[(Decimal('22814'),)][0m
Answer:[32;1m[1;3m22814[0m
[1m> Finished chain.[0m


{'query': 'How much is the price of the inventory for all small size t-shirts?',
 'result': '22814'}