In [5]:
from langchain.vectorstores import Chroma
from langchain.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_groq import ChatGroq
from langchain.embeddings import OllamaEmbeddings


In [2]:
import os

os.environ['LANGCHAIN_TRACING_V2'] = os.getenv("LANGCHAIN_TRACING_V2")
os.environ['LANGCHAIN_ENDPOINT'] = os.getenv("LANGCHAIN_ENDPOINT")
os.environ['LANGCHAIN_API_KEY'] = os.getenv("LANGCHAIN_API_KEY")
os.environ['GROQ_API_KEY'] = os.getenv("GROQ_API_KEY")
os.environ['LANGCHAIN_PROJECT'] = os.getenv("LANGCHAIN_PROJECT")

In [3]:
llm = ChatGroq(model="llama3-8b-8192")
embed_model = OllamaEmbeddings(model="all-minilm")

In [6]:
loader = PyPDFDirectoryLoader("pdfs")


In [7]:
data = loader.load()

In [9]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap = 100)

In [10]:
docs = text_splitter.split_documents(data)

In [12]:
# Creating DB
db_dir = "db"
vectordb = Chroma.from_documents(documents=docs,
                                 embedding=embed_model,
                                 persist_directory=db_dir)

In [13]:
# persist db to disk
vectordb.persist()

In [14]:
vectordb = None

In [15]:
# Load db from disk
vectordb = Chroma(persist_directory= db_dir,embedding_function=embed_model)

In [16]:
vectordb

<langchain_community.vectorstores.chroma.Chroma at 0x1444757f670>

In [17]:
# Make retriever
retriever = vectordb.as_retriever()

In [19]:
answers = retriever.get_relevant_documents("Which is highest and lowest proverty rate state in US for year 2022?")

In [20]:
answers

[Document(page_content='U.S. Census Bureau  3\nUtah had poverty rates in the \nlowest poverty map category, less than 10.0 percent in 2022.\n9 \nAlabama, Arkansas, Kentucky, Louisiana, Mississippi, New Mexico, Oklahoma, West Virginia, and Puerto Rico had 2022 poverty \nrates of 15.0 percent or higher \n(Figure 2).\nIn 2022, no states had a significant \nincrease in poverty, whereas nine \n9 The 2022 poverty rates for Washington \n(10.0 percent), Hawaii (10.2 percent), \nVermont (10.4 percent), and Rhode Island \n(10.8 percent) were not statistically different \nfrom 10.0 percent.states and the District of Columbia \nshowed decreases in poverty rates compared to 2021 (Figure 3). Forty-one states did not signifi-cantly change.\nWhile the overall national 2022 \npoverty rate was 12.6 percent, there was variability among census regions.\n10 Eight out of the \nnine states in the Northeast had \n10 Census regions are groupings of states \nand the District of Columbia that subdivide \nthe Uni

In [21]:
# Chain
from langchain.chains import RetrievalQA

In [22]:
RQAchain = RetrievalQA.from_chain_type(retriever= retriever,
                                       chain_type="stuff",
                                       llm = llm,
                                       return_source_documents = True)

In [23]:
# Cite resources
def process_llm_response(llm_resp):
    print(llm_resp['result'])
    print("Source:\n")
    for refs in llm_resp['source_documents']:
        print(refs.metadata['source'])



In [24]:
Query = "Which is highest and lowest proverty rate state in US for year 2022?"

llm_resp = RQAchain(Query)
process_llm_response(llm_resp=llm_resp)

According to the text, the highest poverty rate state in the US for 2022 is Mississippi, with a rate of 19.1 percent.

The lowest poverty rate state in the US for 2022 is New Hampshire, with a rate of 7.2 percent.
Source:

pdfs\acsbr-016.pdf
pdfs\acsbr-016.pdf
pdfs\acsbr-016.pdf
pdfs\acsbr-016.pdf
