# Test Similarity

Try to make a cache for similar queries

In [15]:
%pip install faiss-cpu

Collecting faiss-cpu
  Using cached faiss_cpu-1.7.4-cp311-cp311-win_amd64.whl (10.8 MB)
Installing collected packages: faiss-cpu
Successfully installed faiss-cpu-1.7.4
Note: you may need to restart the kernel to use updated packages.


In [1]:
import os
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file

api_key=os.environ['OPENAI_API_KEY']
base_url=os.environ['OPENAI_BASE_URL']
api_version=os.environ['OPENAI_API_VERSION']

print(base_url)

https://devsquad-eastus-2.openai.azure.com/


In [13]:
from langchain_community.vectorstores import FAISS
from langchain.embeddings import AzureOpenAIEmbeddings

embeddings = AzureOpenAIEmbeddings(
    api_key=api_key,
    azure_endpoint=base_url, 
    api_version=api_version,
    azure_deployment="text-embedding-ada-002"
)

In [7]:
from pprint import pprint
from langchain_community.document_loaders.csv_loader import CSVLoader


In [10]:
loader = CSVLoader(
    file_path="./data/questions_and_answers.csv",
    csv_args={
        "delimiter": ",",
        "quotechar": '"',
    },
    source_column="Question",
    metadata_columns=["Id", "Answer"],
)

data = loader.load()

In [11]:
print(data[0])

page_content='Question: What is the capital of France?' metadata={'source': 'What is the capital of France?', 'row': 0, 'Id': '0', 'Answer': 'Paris'}


In [16]:
db = FAISS.from_documents(data, embeddings)

In [70]:
retriever = db.as_retriever(
    search_type="similarity_score_threshold", search_kwargs={"score_threshold": 0.85}
)

In [71]:
docs = retriever.get_relevant_documents("what's name of the capital of french?")

In [72]:
pprint(docs)

[Document(page_content='Question: What is the capital of France?', metadata={'source': 'What is the capital of France?', 'row': 0, 'Id': '0', 'Answer': 'Paris'})]
