Downloading necessary packages

In [2]:
!pip install langchain
!pip install google-generativeai
!pip install langchain_experimental
!pip install pymysql

Collecting langchain
  Downloading langchain-0.1.5-py3-none-any.whl.metadata (13 kB)
Collecting PyYAML>=5.3 (from langchain)
  Downloading PyYAML-6.0.1-cp39-cp39-macosx_11_0_arm64.whl.metadata (2.1 kB)
Collecting SQLAlchemy<3,>=1.4 (from langchain)
  Downloading SQLAlchemy-2.0.25-cp39-cp39-macosx_11_0_arm64.whl.metadata (9.6 kB)
Collecting aiohttp<4.0.0,>=3.8.3 (from langchain)
  Downloading aiohttp-3.9.3-cp39-cp39-macosx_11_0_arm64.whl.metadata (7.4 kB)
Collecting async-timeout<5.0.0,>=4.0.0 (from langchain)
  Downloading async_timeout-4.0.3-py3-none-any.whl.metadata (4.2 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain)
  Downloading dataclasses_json-0.6.4-py3-none-any.whl.metadata (25 kB)
Collecting jsonpatch<2.0,>=1.33 (from langchain)
  Downloading jsonpatch-1.33-py2.py3-none-any.whl.metadata (3.0 kB)
Collecting langchain-community<0.1,>=0.0.17 (from langchain)
  Downloading langchain_community-0.0.17-py3-none-any.whl.metadata (7.9 kB)
Collecting langchain-core<0.2,>=0.

Setting up the Google Palm LLM model using api key

In [30]:
from langchain.llms import GooglePalm
import os
from dotenv import load_dotenv
load_dotenv()

llm = GooglePalm(google_api_key=os.environ['GOOGLE_API_KEY'], temperature=0.1)

Setting up the database to communicate with it

In [11]:
# for databases
from langchain.utilities import SQLDatabase
from langchain_experimental.sql import SQLDatabaseChain

db_user = "root"
db_password = "sql#pass"
db_host = "localhost"
db_name = "atliq_tshirts"

db = SQLDatabase.from_uri(f"mysql+pymysql://{db_user}:{db_password}@{db_host}/{db_name}",sample_rows_in_table_info=3)
# print(db.table_info)

In [12]:
db_chain = SQLDatabaseChain.from_llm(llm, db, verbose = True)
qns1 = db_chain.run("How many t-shirts do we have left for nike in extra small size and white color?")
qns2 = db_chain.run("How much is the price of the inventory for all small size t-shirts?")
qns3 = db_chain.run("SELECT SUM(price*stock_quantity) FROM t_shirts WHERE size = 'S'")
qns4 = db_chain.run("SELECT SUM(price * stock_quantity) FROM t_shirts WHERE brand = 'Levi'")
qns5 = db_chain.run("SELECT sum(stock_quantity) FROM t_shirts WHERE brand = 'Levi' AND color = 'White'")

  warn_deprecated(




[1m> Entering new SQLDatabaseChain chain...[0m
How many t-shirts do we have left for nike in extra small size and white color?
SQLQuery:[32;1m[1;3mSELECT stock_quantity FROM t_shirts WHERE brand = 'Nike' AND color = 'White' AND size = 'XS'[0m
SQLResult: [33;1m[1;3m[(58,)][0m
Answer:

Retrying langchain_community.llms.google_palm.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised InvalidArgument: 400 API key expired. Please renew the API key. [reason: "API_KEY_INVALID"
domain: "googleapis.com"
metadata {
  key: "service"
  value: "generativelanguage.googleapis.com"
}
].


[32;1m[1;3m58[0m
[1m> Finished chain.[0m


[1m> Entering new SQLDatabaseChain chain...[0m
How much is the price of the inventory for all small size t-shirts?
SQLQuery:[32;1m[1;3mSELECT SUM(price) FROM t_shirts WHERE size = 'S'[0m
SQLResult: [33;1m[1;3m[(Decimal('263'),)][0m
Answer:[32;1m[1;3m263[0m
[1m> Finished chain.[0m


[1m> Entering new SQLDatabaseChain chain...[0m
SELECT SUM(price*stock_quantity) FROM t_shirts WHERE size = 'S'
SQLQuery:[32;1m[1;3mSELECT SUM(price*stock_quantity) FROM t_shirts WHERE size = 'S'[0m
SQLResult: [33;1m[1;3m[(Decimal('19785'),)][0m
Answer:[32;1m[1;3m19785[0m
[1m> Finished chain.[0m


[1m> Entering new SQLDatabaseChain chain...[0m
SELECT SUM(price * stock_quantity) FROM t_shirts WHERE brand = 'Levi'
SQLQuery:[32;1m[1;3mSELECT SUM(price * stock_quantity) FROM t_shirts WHERE brand = 'Levi'[0m
SQLResult: [33;1m[1;3m[(Decimal('28305'),)][0m
Answer:[32;1m[1;3m28305[0m
[1m> Finished chain.[0m


[1m> Entering new SQL

In [14]:
# for few shot learning
few_shots = [
    {'Question' : "How many t-shirts do we have left for Nike in XS size and white color?",
     'SQLQuery' : "SELECT sum(stock_quantity) FROM t_shirts WHERE brand = 'Nike' AND color = 'White' AND size = 'XS'",
     'SQLResult': "Result of the SQL query",
     'Answer' : qns1
     },
    {'Question': "How much is the total price of the inventory for all S-size t-shirts?",
     'SQLQuery':"SELECT SUM(price*stock_quantity) FROM t_shirts WHERE size = 'S'",
     'SQLResult': "Result of the SQL query",
     'Answer': qns2
     },
    {'Question': "If we have to sell all the Levi’s T-shirts today with discounts applied. How much revenue  our store will generate (post discounts)?" ,
     'SQLQuery' : """SELECT sum(a.total_amount * ((100-COALESCE(discounts.pct_discount,0))/100)) as total_revenue from
(select sum(price*stock_quantity) as total_amount, t_shirt_id from t_shirts where brand = 'Levi'
group by t_shirt_id) a left join discounts on a.t_shirt_id = discounts.t_shirt_id
 """,
     'SQLResult': "Result of the SQL query",
     'Answer': qns3
     } ,
     {'Question' : "If we have to sell all the Levi’s T-shirts today. How much revenue our store will generate without discount?" ,
      'SQLQuery': "SELECT SUM(price * stock_quantity) FROM t_shirts WHERE brand = 'Levi'",
      'SQLResult': "Result of the SQL query",
      'Answer' : qns4
      },
    {'Question': "How many white color Levi's shirt I have?",
     'SQLQuery' : "SELECT sum(stock_quantity) FROM t_shirts WHERE brand = 'Levi' AND color = 'White'",
     'SQLResult': "Result of the SQL query",
     'Answer' : qns5
     }
]

In [16]:
!pip install sentence-transformers

Collecting sentence-transformers
  Downloading sentence_transformers-2.3.1-py3-none-any.whl.metadata (11 kB)
Collecting transformers<5.0.0,>=4.32.0 (from sentence-transformers)
  Downloading transformers-4.37.2-py3-none-any.whl.metadata (129 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m129.4/129.4 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
Collecting torch>=1.11.0 (from sentence-transformers)
  Using cached torch-2.2.0-cp39-none-macosx_11_0_arm64.whl.metadata (25 kB)
Collecting scikit-learn (from sentence-transformers)
  Downloading scikit_learn-1.4.0-1-cp39-cp39-macosx_12_0_arm64.whl.metadata (11 kB)
Collecting scipy (from sentence-transformers)
  Downloading scipy-1.12.0-cp39-cp39-macosx_12_0_arm64.whl.metadata (60 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.4/60.4 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting nltk (from sentence-transformers)
  Downloading nltk-3.8.1-py3-none-any.whl (1.5 MB)
[2K

In [17]:
# for converting to word embeddings
from langchain.embeddings import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings(model_name = 'sentence-transformers/all-MiniLM-L6-v2')

# creating a l=blob for our vector database
to_vectorise = [" ".join(temp.values()) for temp in few_shots]

In [19]:
!pip install chromadb

Collecting chromadb
  Downloading chromadb-0.4.22-py3-none-any.whl.metadata (7.3 kB)
Collecting build>=1.0.3 (from chromadb)
  Downloading build-1.0.3-py3-none-any.whl.metadata (4.2 kB)
Collecting chroma-hnswlib==0.7.3 (from chromadb)
  Downloading chroma_hnswlib-0.7.3-cp39-cp39-macosx_11_0_arm64.whl.metadata (252 bytes)
Collecting fastapi>=0.95.2 (from chromadb)
  Downloading fastapi-0.109.2-py3-none-any.whl.metadata (25 kB)
Collecting uvicorn>=0.18.3 (from uvicorn[standard]>=0.18.3->chromadb)
  Downloading uvicorn-0.27.0.post1-py3-none-any.whl.metadata (6.4 kB)
Collecting posthog>=2.4.0 (from chromadb)
  Downloading posthog-3.4.0-py2.py3-none-any.whl.metadata (2.0 kB)
Collecting pulsar-client>=3.1.0 (from chromadb)
  Downloading pulsar_client-3.4.0-cp39-cp39-macosx_10_15_universal2.whl.metadata (1.1 kB)
Collecting onnxruntime>=1.14.1 (from chromadb)
  Downloading onnxruntime-1.17.0-cp39-cp39-macosx_11_0_universal2.whl.metadata (4.2 kB)
Collecting opentelemetry-api>=1.2.0 (from chroma

In [20]:
# creating a vector database
from langchain.vectorstores import Chroma

vector_db = Chroma.from_texts(to_vectorise, embeddings, metadatas=few_shots)

In [22]:
# to be able to select similar vector from a given database
from langchain.prompts import SemanticSimilarityExampleSelector

example_selector = SemanticSimilarityExampleSelector(vectorstore=vector_db, k=2,)

In [25]:
# for custom inmstructions
from langchain.chains.sql_database.prompt import PROMPT_SUFFIX, _mysql_prompt

# print(_mysql_prompt)
# overall our query will be in between the prefix and suffix

In [26]:
from langchain.prompts.prompt import PromptTemplate

prompt_skeleton = PromptTemplate(input_variables=["Question","SQLQuery","SQLResult","Answer"],
                                 template="\nQuestion: {Question}\nSQLQuery: {SQLQuery}\nSQLResult: {SQLResult}\nAnswer: {Answer}")

In [27]:
from langchain.prompts import FewShotPromptTemplate

few_shot_template = FewShotPromptTemplate(example_selector=example_selector, example_prompt=prompt_skeleton,
                                          prefix=_mysql_prompt,suffix=PROMPT_SUFFIX,
                                          input_variables=["input","table_info","top_k"])

In [None]:
new_db_chain = SQLDatabaseChain(llm=llm, database=db,prompt=few_shot_template)