In [None]:
%pip install pinecone-client
!pip install langchain
!pip install pypdf
!pip install openai
!pip install tiktoken
# !pip install langchain

In [None]:
%pip install "pinecone-client[grpc]"

# To install without gRPC run:
# pip3 install pinecone


In [1]:
from langchain.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OllamaEmbeddings
from langchain_groq import ChatGroq
import os 
from dotenv import load_dotenv
from langchain.vectorstores import Pinecone
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate


In [2]:
load_dotenv()

# Env keys
os.environ['LANGCHAIN_TRACING_V2'] = os.getenv('LANGCHAIN_TRACING_V2')
os.environ['LANGCHAIN_ENDPOINT'] = os.getenv('LANGCHAIN_ENDPOINT')
os.environ['LANGCHAIN_API_KEY'] = os.getenv('LANGCHAIN_API_KEY')
os.environ['LANGCHAIN_PROJECT'] = os.getenv('LANGCHAIN_PROJECT')
os.environ['GROQ_API_KEY'] = os.getenv('GROQ_API_KEY')
os.environ['PINECONE_API_KEY'] = os.getenv('PINECONE_API_KEY')
groq_api_key= os.environ['GROQ_API_KEY']

In [3]:
llm = ChatGroq(model="llama3-8b-8192")
embed_model = OllamaEmbeddings(model="all-minilm")

In [4]:
!mkdir pdfs

A subdirectory or file pdfs already exists.


In [5]:
loader = PyPDFDirectoryLoader("pdfs")
data = loader.load()

In [6]:
data[0]

Document(page_content='Poverty in States and Metropolitan  \nAreas: 2022\nAmerican Community Survey Briefs\nDecember 2023ACSBR-016By Craig Benson\nINTRODUCTION\nPlanners, policymakers, and community stakeholders \nuse poverty estimates as key indicators to evaluate trends and current economic conditions within com-munities and to make comparisons across demo-graphic groups. Federal and state governments often \nuse these estimates to allocate funds to local com-\nmunities. Government agencies, researchers, and local organizations regularly use these estimates to identify the number of individuals and families eligible for vari-ous programs and to measure economic well-being.\nThis brief uses the 2021 and 2022 American \nCommunity Survey (ACS) 1-year estimates and the \n2021 and 2022 Puerto Rico Community Surveys \n(PRCS) to analyze poverty rates for calendar year 2022, as well as the changes in poverty from calen-dar year 2021 for the nation, states, the District of Columbia, Puerto Ri

In [7]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap = 100 )
docs = text_splitter.split_documents(data)

In [8]:
docs[3]

Document(page_content='• In 5 of the 25 most populous metropolitan areas, \nthe poverty rate decreased between 2021 and 2022. The Minneapolis MSA was the only metro area among the 25 most populous metropolitan areas that saw poverty increase, from 8.0 percent to 8.8 \npercent.\n• In 2022, 6.1 percent of people nationally had income \nbelow 50 percent of their poverty threshold, which was not statistically different from the 2021 rate of 6.2 percent. The share of individuals living below 50 percent of poverty decreased in 10 states and the \nDistrict of Columbia and increased in 4 states. \n3 The data collection period for the 2021 ACS spanned January to \nDecember 2021; the data collection period for the 2022 ACS spanned \nJanuary to December 2022.\n4 Following the standard specified by OMB in Statistical Policy \nDirective 14, data from the Current Population Survey Annual Social Economic Supplement are used to estimate the official \nnational poverty rate that can be found in the rep

In [9]:
len(embed_model.embed_query("How are you"))

384

In [10]:
PINCONE_API_KEY = os.environ['PINECONE_API_KEY']

In [11]:
from pinecone.grpc import PineconeGRPC
pc = PineconeGRPC(api_key= PINCONE_API_KEY)

  from tqdm.autonotebook import tqdm


In [15]:
from pinecone import ServerlessSpec
index_name = "tut"
if not pc.has_index(index_name):
    pc.create_index(
        name=index_name,
        dimension=len(embed_model.embed_query("How are you")),
        metric="cosine",
        spec=ServerlessSpec(
            cloud='aws', 
            region='us-east-1'
        ) 
    ) 

In [13]:
## Create embedding for each chunks

In [16]:
doc_search = Pinecone.from_texts([t.page_content for t in docs], embed_model,index_name= index_name)

In [17]:
doc_search

<langchain_community.vectorstores.pinecone.Pinecone at 0x1a7e8a45ff0>

In [18]:
Query = "Which US states have lowest and Highest poverty rates in 2022?"
answers =doc_search.similarity_search(Query)

In [19]:
answers

[Document(page_content='U.S. Census Bureau  3\nUtah had poverty rates in the \nlowest poverty map category, less than 10.0 percent in 2022.\n9 \nAlabama, Arkansas, Kentucky, Louisiana, Mississippi, New Mexico, Oklahoma, West Virginia, and Puerto Rico had 2022 poverty \nrates of 15.0 percent or higher \n(Figure 2).\nIn 2022, no states had a significant \nincrease in poverty, whereas nine \n9 The 2022 poverty rates for Washington \n(10.0 percent), Hawaii (10.2 percent), \nVermont (10.4 percent), and Rhode Island \n(10.8 percent) were not statistically different \nfrom 10.0 percent.states and the District of Columbia \nshowed decreases in poverty rates compared to 2021 (Figure 3). Forty-one states did not signifi-cantly change.\nWhile the overall national 2022 \npoverty rate was 12.6 percent, there was variability among census regions.\n10 Eight out of the \nnine states in the Northeast had \n10 Census regions are groupings of states \nand the District of Columbia that subdivide \nthe Uni

In [20]:
qa = RetrievalQA.from_chain_type(llm=llm,chain_type="stuff",retriever = doc_search.as_retriever()) 

In [21]:
qa.run(Query)

  warn_deprecated(


'According to the text, the states with the lowest poverty rates in 2022 were:\n\n* Utah, with a rate of less than 10.0 percent (in the lowest poverty map category)\n* Washington, with a rate of 10.0 percent\n* Hawaii, with a rate of 10.2 percent\n* Vermont, with a rate of 10.4 percent\n* Rhode Island, with a rate of 10.8 percent\n\nThe state with the lowest poverty rate in 2022 was New Hampshire, with a rate of 7.2 percent.\n\nOn the other hand, the states with the highest poverty rates in 2022 were:\n\n* Mississippi, with a rate of 19.1 percent\n* Louisiana, with a rate of 18.6 percent\n* Alabama, with a rate of 15.0 percent or higher\n* Arkansas, with a rate of 15.0 percent or higher\n* Kentucky, with a rate of 15.0 percent or higher\n* Oklahoma, with a rate of 15.0 percent or higher\n* West Virginia, with a rate of 15.0 percent or higher\n* Puerto Rico, with a rate of 15.0 percent or higher'

In [22]:
doc_search.as_retriever()

VectorStoreRetriever(tags=['Pinecone', 'OllamaEmbeddings'], vectorstore=<langchain_community.vectorstores.pinecone.Pinecone object at 0x000001A7E8A45FF0>)