# LangChain: Q&A over Documents

An example might be a tool that would allow you to query a product catalog for items of interest.

In [1]:
import os
import pandas as pd

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file

### Prepare Data

In [2]:
table_data = {
    "Name": [
        "Men's Tropical Plaid Short-Sleeve Shirt",
        "Men's Plaid Tropic Shirt, Short-Sleeve",
        "Men's TropicVibe Shirt, Short-Sleeve",
        "Sun Shield Shirt"
    ],
    "Description": [
        "Made of 100% polyester, UPF 50+ rated, wrinkle-resistant, front and back cape venting, two front bellows pockets, imported",
        "Made of 52% polyester and 48% nylon, UPF 50+ rated, SunSmart technology, wrinkle-free, front and back cape venting, two front bellows pockets, imported",
        "Made of 71% nylon and 29% polyester, UPF 50+ rated, wrinkle-resistant, front and back cape venting, two front bellows pockets, imported",
        "Made of 78% nylon and 22% Lycra Xtra Life fiber, UPF 50+ rated, moisture-wicking, fits comfortably over swimsuit, abrasion-resistant, imported"
    ],
    "Sun Protection Rating": [
        "SPF 50+, blocks 98% of harmful UV rays",
        "SPF 50+, blocks 98% of harmful UV rays",
        "SPF 50+, blocks 98% of harmful UV rays",
        "SPF"
    ]
}

# Create a DataFrame from the dictionary
df = pd.DataFrame(table_data)
# Write the DataFrame to a CSV file
df.to_csv('data/table_data.csv', index=False)

Note: LLM's do not always produce the same results. When executing the code in your notebook, you may get slightly different answers that those in the video.

In [3]:
# account for deprecation of LLM model
import datetime
# Get the current date
current_date = datetime.datetime.now().date()

# Define the date after which the model should be set to "gpt-3.5-turbo"
target_date = datetime.date(2024, 6, 12)

# Set the model variable based on the current date
if current_date > target_date:
    llm_model = "gpt-3.5-turbo"
else:
    llm_model = "gpt-3.5-turbo-0301"

In [4]:
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import CSVLoader
from langchain.vectorstores import DocArrayInMemorySearch
from langchain_openai.embeddings import OpenAIEmbeddings
from IPython.display import display, Markdown
from langchain.llms import OpenAI

## CSV Q&A — Step By Step

In [5]:
from langchain.document_loaders import CSVLoader
loader = CSVLoader(file_path="data/table_data.csv")

In [6]:
pd.read_csv("data/table_data.csv")

Unnamed: 0,Name,Description,Sun Protection Rating
0,Men's Tropical Plaid Short-Sleeve Shirt,"Made of 100% polyester, UPF 50+ rated, wrinkle...","SPF 50+, blocks 98% of harmful UV rays"
1,"Men's Plaid Tropic Shirt, Short-Sleeve","Made of 52% polyester and 48% nylon, UPF 50+ r...","SPF 50+, blocks 98% of harmful UV rays"
2,"Men's TropicVibe Shirt, Short-Sleeve","Made of 71% nylon and 29% polyester, UPF 50+ r...","SPF 50+, blocks 98% of harmful UV rays"
3,Sun Shield Shirt,Made of 78% nylon and 22% Lycra Xtra Life fibe...,SPF


In [7]:
docs = loader.load()
docs

[Document(metadata={'source': 'data/table_data.csv', 'row': 0}, page_content="Name: Men's Tropical Plaid Short-Sleeve Shirt\nDescription: Made of 100% polyester, UPF 50+ rated, wrinkle-resistant, front and back cape venting, two front bellows pockets, imported\nSun Protection Rating: SPF 50+, blocks 98% of harmful UV rays"),
 Document(metadata={'source': 'data/table_data.csv', 'row': 1}, page_content="Name: Men's Plaid Tropic Shirt, Short-Sleeve\nDescription: Made of 52% polyester and 48% nylon, UPF 50+ rated, SunSmart technology, wrinkle-free, front and back cape venting, two front bellows pockets, imported\nSun Protection Rating: SPF 50+, blocks 98% of harmful UV rays"),
 Document(metadata={'source': 'data/table_data.csv', 'row': 2}, page_content="Name: Men's TropicVibe Shirt, Short-Sleeve\nDescription: Made of 71% nylon and 29% polyester, UPF 50+ rated, wrinkle-resistant, front and back cape venting, two front bellows pockets, imported\nSun Protection Rating: SPF 50+, blocks 98% of 

In [8]:
from langchain.embeddings import OpenAIEmbeddings
embeddings = OpenAIEmbeddings()

  embeddings = OpenAIEmbeddings()


In [9]:
embed = embeddings.embed_query("Hi my name is Harrison")
print(len(embed))
print(embed[:5])

1536
[-0.0219904820469787, 0.006746508733548101, -0.018174780766530483, -0.03918623602138188, -0.01404528989830284]


In [10]:
db = DocArrayInMemorySearch.from_documents(
    docs, 
    embeddings
)

ImportError: Could not import docarray python package. Please install it with `pip install "langchain[docarray]"`.

In [None]:
query = "Please suggest a shirt with sunblocking"

In [None]:
docs = db.similarity_search(query)

In [None]:
len(docs)

In [None]:
docs[0]

In [None]:
retriever = db.as_retriever()

In [None]:
llm = ChatOpenAI(temperature = 0.0, model=llm_model)

In [None]:
qdocs = "".join([docs[i].page_content for i in range(len(docs))])


In [None]:
response = llm.call_as_llm(f"{qdocs} Question: Please list all your \
shirts with sun protection in a table in markdown and summarize each one.") 


In [None]:
display(Markdown(response))

In [None]:
qa_stuff = RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="stuff", 
    retriever=retriever, 
    verbose=True
)

In [None]:
query =  "Please list all your shirts with sun protection in a table \
in markdown and summarize each one."

In [None]:
response = qa_stuff.run(query)

In [None]:
display(Markdown(response))

In [None]:
response = index.query(query, llm=llm)

In [None]:
index = VectorstoreIndexCreator(
    vectorstore_cls=DocArrayInMemorySearch,
    embedding=embeddings,
).from_loaders([loader])

![Call Technique](./img/call-technique.png)