# Prompt Similarity Search


Setup document source

In [1]:
import pandas as pd
df = pd.read_csv("../data/few_shot_samples.csv")
df = df[~df.Category.isna()].reset_index(drop=True)
df = df.rename(columns={"Generated Prompts" : "Query"})
categories = df.Category.unique().tolist()
df

Unnamed: 0,Query,Response,Category
0,What were the sales numbers for Men's Clothing...,"Call Movement API with {""product-name"" = ""Men'...",Movement/Sales
1,Can you give me the sales figures for Beauty p...,"Call Movement API with {""product-name"" = ""Beau...",Movement/Sales
2,I need to see the movement of Electronics in Z...,"Call Movement API with {""product-name"" = ""Elec...",Movement/Sales
3,How much revenue was generated from Home Decor...,"Call Movement API with {""product-name"" = ""Home...",Movement/Sales
4,What was the sales performance for Shoes in Zo...,"Call Movement API with {""product-name"" = ""Shoe...",Movement/Sales
...,...,...,...
77,Export this report to a CSV file.,"Call Export API with {""type"" = ""csv"", data = C...",Export
78,Forward this report to Sarah.,"Call Email API with {""recipient"" = ""Sarah"", da...",Email
79,Share this report with Alex through email.,"Call Email API with {""recipient"" = ""Alex"", dat...",Email
80,Can you direct me to the screen where I can re...,"Call Navigation API with {""screen-name"" = ""Pri...",Navigation


In [2]:
categories

['Movement/Sales',
 'Budget/Forecast',
 'KVI',
 'Price Change',
 'Promotions Percent',
 'Sales Comparison',
 'Promotions',
 'KVI / Price Change',
 'Competitor Comparison',
 'Promotion Analysis',
 'Export',
 'Email',
 'Navigation']

In [3]:
category_df = df[df.Category=="Price Change"]
category_df.sample(3)
for index, row in category_df.sample(3).iterrows():
    print(f"Q: {row.Query}\n{row.Response}\n")

Q: What are the items with a cost change in the last three weeks for Toys and Games in Zone 450?
Call Cost API with {"product-name" = "Toys and Games", "location-name" = "Zone 450", week = "last 3", change = "Y"}

Q: Show me the products with a price change in the last three weeks for Office Supplies in Zone 690.
Call Price API with {"product-name" = "Office Supplies", "location-name" = "Zone 690", week = "last 3", change = "Y"}

Q: Show me all items with a cost change in the last two weeks for Home Appliances in Zone 880.
Call Cost API with {"product-name" = "Home Appliances", "location-name" = "Zone 880", week = "last 2", change = "Y"}



In [4]:
def construct_context(df, category, nsamples=3):
    category_df = df[df.Category==category]
    size = category_df.shape[0]
    nsamples = min(nsamples, size)
    df_n = category_df.sample(nsamples)
    context = f"{category}\n====\n\n"
    for _, row in df_n.iterrows():
        context += f"Q: {row.Query}\n{row.Response}\n\n"
        
    return context.strip()

In [5]:
print(construct_context(df, "Price Change"))

Price Change
====

Q: Show me the products with a price change in the last three weeks for Office Supplies in Zone 690.
Call Price API with {"product-name" = "Office Supplies", "location-name" = "Zone 690", week = "last 3", change = "Y"}

Q: What are the products that have had a price change in the last week for Meat and Poultry in Zone 450?
Call Price API with {"product-name" = "Meat and Poultry", "location-name" = "Zone 450", week = "last week", change = "Y"}

Q: Provide me with a list of all items with a cost change in the last month for Personal Care in Zone 15.
Call Cost API with {"product-name" = "Personal Care", "location-name" = "Zone 15", week = "last month", change = "Y"}


## Construct Unstructured Document

Seems like a step backward. 

In [6]:
from random import shuffle

doc = ""
shuffle(categories)
for category in categories:
    doc += construct_context(df, category)
    doc += "\n\n"
    
doc = doc.strip()
with open("../data/few_shot_samples.unstructured.txt", "w") as f:
    f.write(doc)
    
print(open("../data/few_shot_samples.unstructured.txt").read())

Movement/Sales
====

Q: Can you provide me with the sales data for Toys and Games in Zone 800 for the last six months?
Call Movement API with {"product-name" = "Toys and Games", "quarter" = "last six months", "metrics" = ["sales"], "location-name" = "Zone 800"}

Q: Please give me the sales performance of Pet Supplies in Zone 150 for Q2 and Q3 of last year.
Call Movement API with {"product-name" = "Pet Supplies", "quarter" = ["Q2", "Q3"], "metrics" = ["sales"], "location-name" = "Zone 150"}

Q: What were the quarterly sales figures for Sports Equipment in Zone 400?
Call Movement API with {"product-name" = "Sports Equipment", "quarter" = "quarterly", "metrics" = ["sales"], "location-name" = "Zone 400"}

Promotions Percent
====

Q: What percentage of our Produce sales in Zone 20 are from promotions for the next six months?
Call Movement API with {"product-name" = "Produce", "location-name" = "Zone 20", week = "next 6 months", metrics = ["sales"]} and Call Movement API with {"product-name"

## Create chunks of text

In [7]:
from random import shuffle

text_chunks = []
shuffle(categories)
for category in categories:
    text_chunks.append(construct_context(df, category))

# Question Answering from Context

In [8]:
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.docstore.document import Document
from langchain.prompts import PromptTemplate
from langchain.indexes.vectorstore import VectorstoreIndexCreator

## Add OpenAI Environmental variable

In [9]:
import os
OPENAI_API_KEY = open("../credentials/openai.key.txt").read().replace("\n", "").replace(" ", "")
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY

In [10]:
embeddings = OpenAIEmbeddings()
docsearch = Chroma.from_texts(text_chunks, embeddings, metadatas=[{"source": str(i)} for i in range(len(text_chunks))])

Using embedded DuckDB without persistence: data will be transient


## Get Context similar to Query

In [14]:
query = "How are we tracking against the budget goals for Deli products for the first half of the year?"
output = docsearch.similarity_search(query, k=3)
print(output[0].page_content)

Budget/Forecast
====

Q: Can you provide me with the budget projections for Produce in Zone 450 for the next quarter?
Call Budget API with {"product-name" = "Produce", "quarter" = "next", "location-name" = "Zone 450"} and Call Forecast API with {"product-name" = "Produce", "quarter" = "next", "location-name" = "Zone 450"}

Q: Can you give me an update on the revenue projections for Dairy in Zone 880 for the next three months?
Call Budget API with {"product-name" = "Dairy", "quarter" = "next-3-months", "location-name" = "Zone 880"} and Call Forecast API with {"product-name" = "Dairy", "quarter" = "next-3-months", "location-name" = "Zone 880"}

Q: Can you tell me the budget goals for Frozen Foods for the entire year?
Call Budget API with {"product-name" = "Frozen Foods", "quarter" = "yearly", "location-name" = "all"} and Call Forecast API with {"product-name" = "Frozen Foods", "quarter" = "yearly", "location-name" = "all"}


## Chain llm and docsearch

In [16]:
from langchain.chains.question_answering import load_qa_chain
from langchain.llms import OpenAI

chain = load_qa_chain(OpenAI(temperature=0), chain_type="stuff")
query = "How are we tracking against the budget goals for Deli products for the first half of the year?"
docs = docsearch.similarity_search(query, k=1)
chain.run(input_documents=docs, question=query)

' Call Budget API with {"product-name" = "Deli", "quarter" = "first-half-year", "location-name" = "all"} and Call Forecast API with {"product-name" = "Deli", "quarter" = "first-half-year", "location-name" = "all"}'

In [17]:
def ai(q):
    context = docsearch.similarity_search(q, k=1)
    return chain.run(input_documents=context, question=q)

## Movement

In [18]:
ai("What was the sales performance for Shoes in Zone 300 in Q4 of last year?")

' Call Movement API with {"product-name" = "Shoes", "quarter" = 4, "metrics" = ["sales"], "location-name" = "Zone 300"}'

In [40]:
ai("I need sales data for Grocery for Q3 at Zone 69.")

' Call Movement API with {"product-name" = "Grocery", "quarter" = 3, "metrics" = ["sales"], "location-name" = "Zone 69"}'

## Budget / Forecast

In [19]:
ai("What are the financial goals for Meat and Poultry for the upcoming year?")

' Call Budget API with {"product-name" = "Meat and Poultry", "quarter" = "yearly", "location-name" = "all"} and Call Forecast API with {"product-name" = "Meat and Poultry", "quarter" = "yearly", "location-name" = "all"}'

## KVI

In [20]:
ai("Give me the Primary KVIs for Baby Products in Zone 20.")

' Call KVI API with {"product-name" = "Baby Products", "location-name" = "Zone 20", type = "Primary"}'

## Cost change

In [21]:
ai("What are the products that have had a price change in the last week for Meat and Poultry in Zone 450?")

' Call Cost API with {"product-name" = "Meat and Poultry", "location-name" = "Zone 450", week = "last week", change = "Y"}'

## Promotions Percent

In [22]:
ai("What percentage of our Produce sales in Zone 20 are from promotions for the next six months?")

' Call Movement API with {"product-name" = "Produce", "location-name" = "Zone 20", week = "next six months", metrics = ["sales"]} and Call Movement API with {"product-name" = "Produce", "location-name" = "Zone 20", week = "next six months", metrics = ["sales"], "promo" = "Y"} and Call Calculator API with {"operation" = "division", "data" = [Movement API Response 2, Movement API Response 1], "format" = "percent"}'

## Sales Comparison

In [23]:
ai("Give me a comparison of sales for Personal Care products in Zone 750 for the current quarter and the same quarter last year.")

' Call Movement API with {"product-name" = "Personal Care", "location-name" = "Zone 750", year = "last 1", "location-aggregate-level" = "quarter"} and Call Movement API with {"product-name" = "Personal Care", "location-name" = "Zone 750", year = "current", "location-aggregate-level" = "quarter"}'

In [24]:
ai("Provide me with a year-to-year comparison of sales for Snacks and Confectionery in Zone 150.")

' Call Movement API with {"product-name" = "Snacks and Confectionery", "location-name" = "Zone 150", year = "last 1", "location-aggregate-level" = "year"} and Call Movement API with {"product-name" = "Snacks and Confectionery", "location-name" = "Zone 150", year = "current", "location-aggregate-level" = "year"} and Call Calculator API with {"operation" = "percentage change", "data" = [Movement API Response 2, Movement API Response 1], "format" = "percent"}'

## Promotions

In [25]:
ai("Give me a list of upcoming items with a BOGO 25% off offer in the Toys category. Which of these items are in Zone 950?")


' Call Promo API with {"product-name" = "Toys", "location-name" = "Zone 950", "week" = "upcoming", "type" = "BOGO 25% off"}'

## KVI / Price change

In [26]:
ai("What are the KVIs for Zone 500 which have a scheduled price change in the coming month?")


' Call KVI API with {"product-name" = "Center Store", "location-name" = "Zone 500"} and Call Price API with {"item-list" = KVI API Response, "location-name" = "Zone 500", week = "next 4", change = "Y"}'

## Competitor Comparison

In [27]:
ai("How does our current price for Snacks compare to Meijer in the Midwest?")


' Call Price Index API with {"product-name" = "Snacks", "location-name" = "Midwest", "competitor-name" = "Meijer"}'

## Promotion Analysis

In [31]:
ai("Which promotions have shown the best results for the Cleaning Supplies category in the last 2 months?")

' Call Promo Analysis API with {"product-name" = "Cleaning Supplies", "top" = 2, "last" = 2, "aggregate_by" = "month"}'

## Export Report

In [32]:
ai("Save this report in an XLSX format.")

' Call Export API with {"type" = "XLSX", data = Current Screen}'

## Email

In [33]:
ai("Dispatch this report to Kevin via email.")

' Call Email API with {"recipient" = "Kevin", data = Current Screen}'

## Navigation

In [34]:
ai("Guide me to the price review and approval screen, please.")

' You can call the Navigation API with {"screen-name" = "Price Review & Approval"} to be directed to the price review and approval page.'

In [35]:
ai("I would like to access the screen where I can review and approve prices. Could you guide me there?")

' Yes, you can access the screen where you can review and approve prices by calling the Navigation API with {"screen-name" = "Price Review & Approval"}.'

In [36]:
ai("Navigate me to the price review and approval page.")

' Call Navigation API with {"screen-name" = "Price Review & Approval"}'