## LangChain: Q&A over Documents

An example might be a tool that would allow you to query a product catalog for items of interest.

In [1]:
# Import required packages
import os
from langchain.chains import RetrievalQA
from langchain.chat_models import AzureChatOpenAI
from langchain.document_loaders import CSVLoader
from langchain.vectorstores import DocArrayInMemorySearch
from IPython.display import display, Markdown

# Define Azure OpenAI endpoint parameters
OPENAI_API_VERSION = "2023-07-01-preview"
OPENAI_DEPLOYMENT = os.getenv("OPENAI_API_DEPLOY") # Set AOAI deployment name as env variable

In [2]:
file = 'OutdoorClothingCatalog_1000.csv'
loader = CSVLoader(file_path=file, encoding='utf-8')

In [3]:
from langchain.indexes import VectorstoreIndexCreator

In [4]:
# Install docarray package
# !pip install docarray

In [None]:
index = VectorstoreIndexCreator(
    vectorstore_cls=DocArrayInMemorySearch
).from_loaders([loader])

In [None]:
query ="Please list all your shirts with sun protection \
in a table in markdown and summarize each one."

In [None]:
response = index.query(query)
display(Markdown(response))

In [None]:
loader = CSVLoader(file_path=file)
docs = loader.load()
docs[0]

In [None]:
from langchain.embeddings import OpenAIEmbeddings
embeddings = OpenAIEmbeddings()

In [None]:
embed = embeddings.embed_query("Hi my name is Harrison")
print(len(embed))
print(embed[:5])

In [None]:
db = DocArrayInMemorySearch.from_documents(
    docs, 
    embeddings
)

In [None]:
query = "Please suggest a shirt with sunblocking"
docs = db.similarity_search(query)

In [None]:
len(docs)

In [None]:
docs[0]

In [None]:
retriever = db.as_retriever()

In [None]:
# Instantiation of Azure OpenAI GPT model
llm = AzureChatOpenAI(
    openai_api_version=OPENAI_API_VERSION,
    deployment_name=OPENAI_DEPLOYMENT,
    temperature=0.0
)

In [None]:
qdocs = "".join([docs[i].page_content for i in range(len(docs))])

In [None]:
response = llm.call_as_llm(f"{qdocs} Question: Please list all your \
shirts with sun protection in a table in markdown and summarize each one.")

display(Markdown(response))

In [None]:
qa_stuff = RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="stuff", 
    retriever=retriever, 
    verbose=True
)

In [None]:
query =  "Please list all your shirts with sun protection in a table \
in markdown and summarize each one."

In [None]:
response = qa_stuff.run(query)
display(Markdown(response))

In [None]:
response = index.query(query, llm=llm)

In [None]:
index = VectorstoreIndexCreator(
    vectorstore_cls=DocArrayInMemorySearch,
    embedding=embeddings,
).from_loaders([loader])