In [None]:
!pip install langchain langchain_community langchain_google_genai langchain_chroma pypdf



In [None]:
from langchain_community.document_loaders import PyPDFDirectoryLoader

loader = PyPDFDirectoryLoader("/content/drive/MyDrive/LLM/us_cenus")
documents = loader.load()

In [None]:
len(documents)

63

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size= 1000, chunk_overlap = 200)
docs = text_splitter.split_documents(documents)

In [None]:
len(docs) #after splitting total number of documents

316

In [None]:
docs[0]

Document(metadata={'producer': 'Adobe PDF Library 17.0', 'creator': 'Adobe InDesign 18.2 (Windows)', 'creationdate': '2023-09-09T07:52:17-04:00', 'author': 'U.S. Census Bureau', 'keywords': 'acsbr-015', 'moddate': '2023-09-12T14:44:47+01:00', 'title': 'Health Insurance Coverage Status and Type by Geography: 2021 and 2022', 'trapped': '/false', 'source': '/content/drive/MyDrive/LLM/us_cenus/acsbr-015.pdf', 'total_pages': 18, 'page': 0, 'page_label': '1'}, page_content='Health Insurance Coverage Status and Type \nby Geography: 2021 and 2022\nAmerican Community Survey Briefs\nACSBR-015\nIssued September 2023\nDouglas Conway and Breauna Branch\nINTRODUCTION\nDemographic shifts as well as economic and govern-\nment policy changes can affect peopleâ€™s access to \nhealth coverage. For example, between 2021 and 2022, \nthe labor market continued to improve, which may \nhave affected private coverage in the United States \nduring that time.\n1 Public policy changes included \nthe renewal of th

In [None]:
import os
from google.colab import userdata
os.environ["GOOGLE_API_KEY"] = userdata.get("GOOGLE_API_KEY")

In [None]:
#Embedding vector
from langchain_google_genai import GoogleGenerativeAIEmbeddings
embedding= GoogleGenerativeAIEmbeddings(model="models/embedding-001")

vector = embedding.embed_query("AGI")
vector[:5]

[0.04745418578386307,
 -0.032611019909381866,
 -0.06321417540311813,
 0.005565571133047342,
 0.06395020335912704]

In [None]:
from langchain_chroma import Chroma
vectordb= Chroma.from_documents(
                                documents= docs,
                                embedding= embedding
                                )

In [None]:
retriever = vectordb.as_retriever(search_type= "similarity", search_kwargs= {"k":10})
retrieved_docs= retriever.invoke("What is health insurance coverage?")

In [None]:
len(retrieved_docs)

10

In [None]:
retrieved_docs[0].page_content

'2 U.S. Census Bureau\nWHAT IS HEALTH INSURANCE COVERAGE?\nThis brief presents state-level estimates of health insurance coverage \nusing data from the American Community Survey (ACS). The  \nU.S. Census Bureau conducts the ACS throughout the year; the \nsurvey asks respondents to report their coverage at the time of \ninterview. The resulting measure of health insurance coverage, \ntherefore, reflects an annual average of current comprehensive \nhealth insurance coverage status.* This uninsured rate measures a \ndifferent concept than the measure based on the Current Population \nSurvey Annual Social and Economic Supplement (CPS ASEC). \nFor reporting purposes, the ACS broadly classifies health insurance \ncoverage as private insurance or public insurance. The ACS defines \nprivate health insurance as a plan provided through an employer \nor a union, coverage purchased directly by an individual from an \ninsurance company or through an exchange (such as healthcare.'

In [None]:
from langchain_google_genai import GoogleGenerativeAI

llm= GoogleGenerativeAI(
                        model= "gemini-2.0-flash",
                        temperature= 0.7,
                        max_tokens= 200
                      )

In [None]:
llm.invoke("What is health insurance coverage?")

"Health insurance coverage is a contract between you and an insurance company. In exchange for paying a premium (usually monthly), the insurance company agrees to pay for a portion of your medical expenses when you get sick or injured.  It helps protect you from potentially high medical costs.\n\nHere's a breakdown of the key aspects:\n\n*   **Premium:** This is the regular payment you make to keep your health insurance active. It's like a subscription fee.\n\n*   **Deductible:** This is the amount you pay out-of-pocket for covered healthcare services before your insurance starts to pay.  For example, if your deductible is $2,000, you'll pay the first $2,000 of your medical bills, and then your insurance will start covering costs.\n\n*   **Copay:** This is a fixed amount you pay for a specific healthcare service, like a doctor's visit or a prescription.  For example, you might"

In [None]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate


In [None]:
system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)

prompt= ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}")
    ]
)

In [None]:
question_answer_chain= create_stuff_documents_chain(llm, prompt)
rag_chain= create_retrieval_chain(retriever, question_answer_chain)

In [None]:
response = rag_chain.invoke(({"input": "What is health insurance coverage?"}))
print(response['answer'])

Health insurance coverage is broadly classified as private or public. Private health insurance includes plans through employers/unions or those purchased individually. Public insurance includes federal programs like Medicare, Medicaid, and state health plans.
