# Import Headers

In [1]:
from langchain_openai import ChatOpenAI
from dotenv import load_dotenv
import os

load_dotenv()

True

In [24]:
from langchain_community.vectorstores import Chroma
from langchain.prompts import FewShotPromptTemplate
from langchain.prompts.prompt import PromptTemplate
from langchain.prompts.example_selector import SemanticSimilarityExampleSelector
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.chains.sql_database.prompt import PROMPT_SUFFIX, _mysql_prompt


# Setup LLM

In [2]:
api_key = os.getenv("OPENAI_API_KEY")
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)

response = llm.invoke("Write a SQL query to get top 5 customers by sales. keep it short")
print(response.content)

SELECT customer_id, SUM(sales_amount) as total_sales
FROM sales
GROUP BY customer_id
ORDER BY total_sales DESC
LIMIT 5;


# Connect With Database

In [3]:
from langchain.utilities import SQLDatabase

db_user = "root"
db_password = "root"
db_host = "localhost"
db_name = "tshirts"

db = SQLDatabase.from_uri(f"mysql+pymysql://{db_user}:{db_password}@{db_host}/{db_name}",sample_rows_in_table_info=3)

# print(db.table_info)

______________

# Ask questions with Zero shot learning

In [4]:
from langchain.agents import create_sql_agent

sql_agent = create_sql_agent(llm, db=db, agent_type="zero-shot-react-description", handle_parsing_errors=True) # set verbose=True to check how it thinks

## Question 1

In [5]:
q1 = sql_agent.invoke("How many t-shirts do we have left for Nike in extra small size and white color?")
print(q1)

{'input': 'How many t-shirts do we have left for Nike in extra small size and white color?', 'output': '1'}


- ##### `1` is incorrect. The correct answer is `25`.  
--

## Question 2

In [6]:
q2 = sql_agent.invoke("How much is the price of the inventory for all small size t-shirts?")
print(q2)

{'input': 'How much is the price of the inventory for all small size t-shirts?', 'output': '$13,429'}


- ##### `305` is wrong. The correct answer is `$13,429`.  
--

# Question 3

In [7]:
q3 = sql_agent.invoke("If we have to sell all the Levi’s T-shirts today with discounts applied. How much revenue our store will generate (post discounts)?")
print(f"Input:\n{q3['input']}\nOutput:\n{q3['output']}")

Input:
If we have to sell all the Levi’s T-shirts today with discounts applied. How much revenue our store will generate (post discounts)?
Output:
The store will generate $28.20 in revenue by selling all Levi's T-shirts with discounts applied.


- ##### `$1,212.60` is incorrect. The correct answer is `$12,147$`.  
--

# Question 4

In [8]:
q4 = sql_agent.invoke("What is the total value of Levi’s T-shirts we have in stock?")
print(q4)

{'input': 'What is the total value of Levi’s T-shirts we have in stock?', 'output': 'The total value of Levi’s T-shirts we have in stock is $12,956.'}


- ##### `$28.20` is incorrect. The correct answer is `$12,946`.  
--

# Question 5

In [9]:
q5 = sql_agent.invoke("How many white color Levi's t shirts we have available?")
print(q5)

{'input': "How many white color Levi's t shirts we have available?", 'output': "There are 2 white color Levi's t-shirts available."}


- ##### `2` is incorrect. The correct answer is `5`.

_____________________

# Provide correct SQL queries and get the results

In [32]:
q1 = sql_agent.invoke("select * from t_shirts Where brand = 'Nike' AND color = 'White' and size = 'XS'")
q2 = sql_agent.invoke("SELECT SUM(price*stock_quantity) FROM t_shirts WHERE size = 'S'")
q3 = sql_agent.invoke("""
    select sum(a.total_amount * ((100-COALESCE(discounts.pct_discount,0))/100)) as total_revenue from
    (select sum(price*stock_quantity) as total_amount, t_shirt_id from t_shirts where brand = 'Levi'
    group by t_shirt_id) a left join discounts on a.t_shirt_id = discounts.t_shirt_id
     """)
q4 = sql_agent.invoke("SELECT SUM(price * stock_quantity) FROM t_shirts WHERE brand = 'Levi'")
q5 = sql_agent.invoke("SELECT sum(stock_quantity) FROM t_shirts WHERE brand = 'Levi' AND color = 'White'")

print(f"Q1 Output: {q1}")
print(f"Q2 Output: {q2}")
print(f"Q3 Output: {q3}")
print(f"Q4 Output: {q4}")
print(f"Q5 Output: {q5}")

KeyboardInterrupt: 

__________________

# Few Shot Learning

In [33]:
few_shots = [
    {
      'Question' : "How many t-shirts do we have left for Nike in XS size and white color?",
      'SQLQuery' : "SELECT sum(stock_quantity) FROM t_shirts WHERE brand = 'Nike' AND color = 'White' AND size = 'XS'",
      'SQLResult': "Result of the SQL query",
      'Answer' : q1
     },
    {
      'Question': "How much is the total price of the inventory for all S-size t-shirts?",
      'SQLQuery':"SELECT SUM(price*stock_quantity) FROM t_shirts WHERE size = 'S'",
      'SQLResult': "Result of the SQL query",
      'Answer': q2
    },
    {
      'Question': "If we have to sell all the Levi’s T-shirts today with discounts applied. How much revenue  our store will generate (post discounts)?" ,
      'SQLQuery' : """SELECT sum(a.total_amount * ((100-COALESCE(discounts.pct_discount,0))/100)) as total_revenue from
                   (select sum(price*stock_quantity) as total_amount, t_shirt_id from t_shirts where brand = 'Levi'
                   group by t_shirt_id) a left join discounts on a.t_shirt_id = discounts.t_shirt_id
                   """,
      'SQLResult': "Result of the SQL query",
      'Answer': q3
    },
    {
      'Question' : "If we have to sell all the Levi’s T-shirts today. How much revenue our store will generate without discount?" ,
      'SQLQuery': "SELECT SUM(price * stock_quantity) FROM t_shirts WHERE brand = 'Levi'",
      'SQLResult': "Result of the SQL query",
      'Answer' : q4
    },
    {
      'Question': "How many white color Levi's shirt I have?",
      'SQLQuery' : "SELECT sum(stock_quantity) FROM t_shirts WHERE brand = 'Levi' AND color = 'White'",
      'SQLResult': "Result of the SQL query",
      'Answer' : q5
    }
]

In [34]:
embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')

In [None]:
texts = [d["Question"] for d in few_shots]

metadatas = [
    {
        "sql": d["SQLQuery"],
        "sql_result": d["SQLResult"],
        "answer_input": d["Answer"]["input"],
        "answer_output": d["Answer"]["output"]
    }
    for d in few_shots
]

vectorstore = Chroma.from_texts(
    texts=texts,
    embedding=embeddings,
    metadatas=metadatas
)

example_selector = SemanticSimilarityExampleSelector(
    vectorstore=vectorstore,
    k=2  
)
