In [1]:
import os
from langchain_community.document_loaders import PyPDFLoader,PyPDFDirectoryLoader
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA 
from langchain_community.vectorstores import FAISS
from dotenv import load_dotenv

In [2]:
loader=PyPDFDirectoryLoader("./us_census")
documents=loader.load()
text_splitter=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200)
final_documents=text_splitter.split_documents(documents)
len(final_documents)

316

In [3]:
#embbeding using huggingface
huggingface_embeddings=HuggingFaceBgeEmbeddings(
    model_name="BAAI/bge-large-zh",
    model_kwargs={
        "device": "cpu"
    },
    encode_kwargs={'normalize_embeddings':True}
    
)

  from tqdm.autonotebook import tqdm, trange


In [4]:
import numpy as np
np.array(huggingface_embeddings.embed_query(final_documents[0].page_content)).shape


(1024,)

In [5]:
## VectorStore Creation
vectorstore=FAISS.from_documents(final_documents[:120],huggingface_embeddings)

In [6]:
final_documents[:120]

[Document(page_content='Health Insurance Coverage Status and Type \nby Geography: 2021 and 2022\nAmerican Community Survey Briefs\nACSBR-015Issued September 2023Douglas Conway and Breauna Branch\nINTRODUCTION\nDemographic shifts as well as economic and govern-\nment policy changes can affect people’s access to health coverage. For example, between 2021 and 2022, the labor market continued to improve, which may have affected private coverage in the United States \nduring that time.\n1 Public policy changes included \nthe renewal of the Public Health Emergency, which \nallowed Medicaid enrollees to remain covered under the Continuous Enrollment Provision.\n2 The American \nRescue Plan (ARP) enhanced Marketplace premium subsidies for those with incomes above 400 percent of the poverty level as well as for unemployed people.\n3', metadata={'source': 'us_census\\acsbr-015.pdf', 'page': 0}),
 Document(page_content='2 The American \nRescue Plan (ARP) enhanced Marketplace premium subsidies for

In [7]:
## Query using Similarity Search
query="WHAT IS HEALTH INSURANCE COVERAGE?"
relevant_docments=vectorstore.similarity_search(query)

print(relevant_docments[0].page_content)

private health insurance as a plan provided through an employer 
or a union, coverage purchased directly by an individual from an 
insurance company or through an exchange (such as healthcare.
gov), or coverage through TRICARE. Public insurance coverage 
includes federal programs (such as Medicare, Medicaid, and the 
Children’s Health Insurance Program or CHIP), individual state 
health plans, and CHAMPVA (Civilian Health and Medical Program 
at the Department of Veterans Affairs), as well as care provided 
by the Department of Veterans Affairs. In the ACS, people are 
considered insured if they were covered by any of these types 
of health insurance at time of interview. People are considered 
uninsured if they were not covered by any of these types of health 
insurance at time of interview or if they only had coverage through 
the Indian Health Service (IHS), as IHS coverage is not considered 
comprehensive.


In [8]:
retriever=vectorstore.as_retriever(search_type="similarity",search_kwargs={"k":3})
print(retriever)

tags=['FAISS', 'HuggingFaceBgeEmbeddings'] vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x000001C916FC9B40> search_kwargs={'k': 3}


In [9]:
#load hugging face access token
load_dotenv()
os.environ["HUGGINGFACEHUB_API_TOKEN"] = os.getenv("HUGGINGFACEHUB_API_TOKEN")


In [10]:
from langchain_community.llms import HuggingFaceHub

hf=HuggingFaceHub(
    repo_id="mistralai/Mistral-7B-v0.1",
    model_kwargs={"temperature":0.2,"max_length":500}

)
query="Private vs public health insurance coverage"
hf.invoke(query)

  warn_deprecated(


'Private vs public health insurance coverage\n\nPrivate health insurance is a type of health insurance that is not provided by the government. It is usually offered by private companies and is often more expensive than public health insurance. Private health insurance can be a good option for people who want more coverage than what is offered by public health insurance.\n\nPublic health insurance is a type of health insurance that is provided by the government. It is usually less expensive than private health insurance and is often more comprehensive. Public health insurance can be a good option'

In [13]:
prompt_template="""
Use the following piece of context to answer the question asked.
Please try to provide the answer only based on the context

{context}
Question:{question}

Helpful Answers:
 """
prompt=PromptTemplate(template=prompt_template,input_variables=["context","question"])

In [15]:
retrievalQA=RetrievalQA.from_chain_type(
    llm=hf,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True,
    chain_type_kwargs={"prompt":prompt}
)

In [16]:
query="""DIFFERENCES IN THE
UNINSURED RATE BY STATE
IN 2022"""

In [18]:
# Call the QA chain with our query.
result = retrievalQA.invoke({"query": query})
print(result['result'])


Use the following piece of context to answer the question asked.
Please try to provide the answer only based on the context

10 U.S. Census Bureau
SUMMARY
The uninsured rate fell in 27 states 
(mainly states that had expanded 
Medicaid eligibility), while only 
Maine had an increase of 0.8 
percentage points. Only one state 
saw a decrease in public coverage 
(Rhode Island), while seven states 
experienced decreases in private 
coverage. As groups, states that 
expanded Medicaid eligibility saw 
an increase in public coverage, 
while states that did not expand 
Medicaid eligibility saw an increase 
in private coverage from 2021 to 2022, although expansion states 
had both higher private and public 
coverage rates than nonexpansion 
states to start with in both 2021 and 
2022. Massachusetts had the low -
est uninsured rate and Texas had 
the highest in 2022.37 In 2022, Utah 
had the highest private coverage 
and lowest public coverage rate, 
while New Mexico had the high -
est public c