### **In this example, we'll create a vector db based on a sample file of user questions and sql queries**
 - **Read the example file. Which is a list of user question and sql query**
 - **Create a json file on top of that esample file.**
 - **Create a documents list by reading the json file.**
 - **Create a vector DB over the document list using an embedding model.**
 - **Check the results by vector similarity search.**

In [16]:
from langchain.docstore.document import Document
from langchain_community.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.prompts.example_selector.semantic_similarity import SemanticSimilarityExampleSelector
from qna_examples import load_examples
import os

In [2]:
# Set the vector db path
EXAMPLE_VECTOR_PATH = "data/example_store.faiss"

In [7]:
# Define the embedding model
def get_embedding_model():
    return HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2",model_kwargs={'device': 'cpu'})

In [8]:
def build_or_load_example_selector(k=3) -> SemanticSimilarityExampleSelector:
    model = get_embedding_model()
    if not os.path.exists(EXAMPLE_VECTOR_PATH):
        examples = load_examples()
        docs = [Document(page_content=ex["input"], metadata={"query": ex["query"]}) for ex in examples]
        db = FAISS.from_documents(docs, model)
        db.save_local(EXAMPLE_VECTOR_PATH)
        return db.as_retriever(search_kwargs={"k": k})            
    else:
        return FAISS.load_local(EXAMPLE_VECTOR_PATH, model, allow_dangerous_deserialization=True).as_retriever(search_kwargs={"k": k})


In [9]:
example_selector = build_or_load_example_selector(k=3)

  from .autonotebook import tqdm as notebook_tqdm


In [10]:
#user_input = "show the total open NRT"
user_input = "show the total taxi revenue"
#user_input = "show the total open outsource work orders"

In [11]:
similar_examples = example_selector.invoke(user_input)
few_shot = [{"input": d.page_content, "query": d.metadata["query"]} for d in similar_examples]

In [12]:
similar_examples

[Document(id='484c3609-1e74-4f99-bdf7-b9ac90e71535', metadata={'query': 'SELECT SUM(amount) FROM vw_taxi_collection'}, page_content='Show the total collection amount.'),
 Document(id='8f982393-b1bf-4cf3-bc2a-6d6f882c7b3f', metadata={'query': 'SELECT SUM(credit_card_amount) FROM vw_taxi_collection'}, page_content='Show the total cc amount.'),
 Document(id='1260c808-ba13-43cc-99c0-13a7870e688c', metadata={'query': 'SELECT Sum(Total_Outstanding) FROM vw_employee_outstanding'}, page_content='what is the total outstanding?')]

In [13]:
few_shot

[{'input': 'Show the total collection amount.',
  'query': 'SELECT SUM(amount) FROM vw_taxi_collection'},
 {'input': 'Show the total cc amount.',
  'query': 'SELECT SUM(credit_card_amount) FROM vw_taxi_collection'},
 {'input': 'what is the total outstanding?',
  'query': 'SELECT Sum(Total_Outstanding) FROM vw_employee_outstanding'}]