# LangChain: Q&A over Documents

An example might be a tool that would allow you to query a product catalog for items of interest.


In [1]:
#pip install --upgrade langchain

In [2]:
import os

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file

In [3]:
# account for deprecation of LLM model
import datetime
# Get the current date
current_date = datetime.datetime.now().date()

# Define the date after which the model should be set to "gpt-3.5-turbo"
target_date = datetime.date(2024, 6, 12)

# Set the model variable based on the current date
if current_date > target_date:
    llm_model = "gpt-3.5-turbo"
else:
    llm_model = "gpt-3.5-turbo-0301"

In [4]:
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import CSVLoader
from langchain.vectorstores import DocArrayInMemorySearch
from IPython.display import display, Markdown



In [None]:
file = 'OutdoorClothingCatalog_1000.csv'
loader = CSVLoader(file_path=file)

In [5]:
from langchain.indexes import VectorstoreIndexCreator

In [7]:
#!pip install docarray

In [9]:
index = VectorstoreIndexCreator(
    vectorstore_cls=DocArrayInMemorySearch
    ).from_loaders([loader])

NameError: name 'loader' is not defined

In [10]:
query ="Please list all your shirts with sun protection \
in a table in markdown and summarize each one."

In [11]:
response = index.query(query)

NameError: name 'index' is not defined

In [None]:
display(Markdown(response))

## Step-by-step

<div style="width: 100%;">
  <img src="attachment:c4ac45af-8d38-4b7b-8dbc-c6fcbf82a001.png"  width="20%" height="20%">
  <div style="width: 50%; float: left;"></div> <!-- Empty Column for spacing -->
    <img width="10%" height="20%">
  <img src="attachment:bf70ebcf-4835-4788-b5b9-8f52d2ae6836.png"  width="35%" height="30%">
</div>

<div style="width: 100%;">
  <img src="attachment:ed77fe46-7593-4bdd-ab74-e4fa1c6cdb42.png"  width="35%" height="30%">
  <img src="attachment:a52abe6b-6052-4066-b751-d24549cca8c0.png"  width="35%" height="30%">
    <img src="attachment:123357d7-b3e1-4932-84df-2b341477ec98.png"  width="25%" height="30%">
</div>


In [None]:
from langchain.document_loaders import CSVLoader

loader = CSVLoader(file_path=file)

In [None]:
docs = loader.load()

In [None]:
docs[0]

In [None]:
from langchain.embeddings import OpenAIEmbeddings

embeddings = OpenAIEmbeddings()

In [12]:
embed = embeddings.embed_query("Hi my name is Harrison")

NameError: name 'embeddings' is not defined

In [None]:
print(len(embed))

In [None]:
print(embed[:5])

In [None]:
db = DocArrayInMemorySearch.from_documents(
    docs, 
    embeddings
)

In [None]:
query = "Please suggest a shirt with sunblocking"

In [None]:
docs = db.similarity_search(query)

In [None]:
len(docs)

In [None]:
docs[0]

In [None]:
retriever = db.as_retriever()

In [None]:
llm = ChatOpenAI(temperature = 0.0, model=llm_model)

In [None]:
qdocs = "".join([docs[i].page_content for i in range(len(docs))])


In [None]:
response = llm.call_as_llm(f"{qdocs} Question: Please list all your \
shirts with sun protection in a table in markdown and summarize each one.") 


In [None]:
display(Markdown(response))

In [None]:
qa_stuff = RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="stuff", 
    retriever=retriever, 
    verbose=True
)

In [None]:
query =  "Please list all your shirts with sun protection in a table \
in markdown and summarize each one."

In [None]:
response = qa_stuff.run(query)

In [None]:
display(Markdown(response))

In [None]:
response = index.query(query, llm=llm)

In [None]:
index = VectorstoreIndexCreator(
    vectorstore_cls=DocArrayInMemorySearch,
    embedding=embeddings,
).from_loaders([loader])

<img src="attachment:4edc5eae-5762-49a8-99a5-3c66e411b44a.png"  width="25%" height="20%">
<img src="attachment:7a693e25-3ccd-403b-b57e-911f8000cbbe.png"  width="35%" height="50%">