# LangChain: Q&A over Documents

An example might be a tool that would allow you to query a product catalog for items of interest.

In [164]:
# !pip install langchain==0.0.179
# !pip install openai==0.27.7

[0mCollecting langchain==0.0.179
  Downloading langchain-0.0.179-py3-none-any.whl.metadata (12 kB)
Downloading langchain-0.0.179-py3-none-any.whl (907 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m907.7/907.7 kB[0m [31m9.8 MB/s[0m eta [36m0:00:00[0m:00:01[0m0:01[0m
[0m[33mDEPRECATION: bert-score 0.3.11 has a non-standard dependency specifier transformers>=3.0.0numpy. pip 24.0 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of bert-score or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063[0m[33m
[0m[33mDEPRECATION: pytorch-lightning 1.5.10 has a non-standard dependency specifier torch>=1.7.*. pip 24.0 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming depe

In [165]:
import os

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file

In [166]:
llm_model = "gpt-3.5-turbo"

In [167]:
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import CSVLoader
from langchain.vectorstores import DocArrayInMemorySearch
from IPython.display import display, Markdown
from langchain.llms import OpenAI

In [168]:
file = 'qa_clothing_dataset.csv'
loader = CSVLoader(file_path=file)

In [169]:
from langchain.indexes import VectorstoreIndexCreator

In [170]:
index = VectorstoreIndexCreator(
    vectorstore_cls=DocArrayInMemorySearch
).from_loaders([loader])

ValidationError: 1 validation error for VectorstoreIndexCreator
vectorstore_cls
  subclass of VectorStore expected (type=type_error.subclass; expected_class=VectorStore)

In [None]:
query ="Please list all your shirts with sun protection \
in a table in markdown and summarize each one."

In [None]:
llm_replacement_model = OpenAI(temperature=0, 
                               model='gpt-3.5-turbo-instruct')

response = index.query(query, 
                       llm = llm_replacement_model)

In [None]:
display(Markdown(response))

## Step By Step

In [None]:
from langchain.document_loaders import CSVLoader
loader = CSVLoader(file_path=file)

In [None]:
docs = loader.load()

In [None]:
docs[0]

In [None]:
from langchain.embeddings import OpenAIEmbeddings
embeddings = OpenAIEmbeddings()

In [None]:
embed = embeddings.embed_query("Hi my name is Harrison")

In [None]:
print(len(embed))

In [None]:
print(embed[:5])

In [None]:
db = DocArrayInMemorySearch.from_documents(
    docs, 
    embeddings
)

In [None]:
query = "Please suggest a shirt with sunblocking"

In [None]:
docs = db.similarity_search(query)

In [None]:
len(docs)

In [None]:
docs[0]

In [None]:
retriever = db.as_retriever()

In [None]:
llm = ChatOpenAI(temperature = 0.0, model=llm_model)

In [None]:
qdocs = "".join([docs[i].page_content for i in range(len(docs))])


In [None]:
response = llm.call_as_llm(f"{qdocs} Question: Please list all your \
shirts with sun protection in a table in markdown and summarize each one.") 


In [None]:
display(Markdown(response))

In [None]:
qa_stuff = RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="stuff", 
    retriever=retriever, 
    verbose=True
)

In [None]:
query =  "Please list all your shirts with sun protection in a table \
in markdown and summarize each one."

In [None]:
response = qa_stuff.run(query)

In [None]:
display(Markdown(response))

In [None]:
response = index.query(query, llm=llm)

In [None]:
index = VectorstoreIndexCreator(
    vectorstore_cls=DocArrayInMemorySearch,
    embedding=embeddings,
).from_loaders([loader])

## Experiment on your own