In [1]:
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS

from langchain_community.embeddings import HuggingFaceBgeEmbeddings
from langchain.prompts import PromptTemplate

from langchain.chains import RetrievalQA

In [2]:
## Read the ppdfs from the folder
loader=PyPDFDirectoryLoader("./us_census")

documents=loader.load()

text_splitter=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200)

final_documents=text_splitter.split_documents(documents)
final_documents[0]

Document(metadata={'producer': 'Adobe PDF Library 16.0.5', 'creator': 'Adobe InDesign 17.1 (Windows)', 'creationdate': '2022-07-21T14:09:01-04:00', 'author': 'U.S. Census Bureau', 'moddate': '2022-07-21T14:55:54-04:00', 'subject': 'Household Economic Studies', 'title': 'Occupation, Earnings, and Job Characeristics', 'trapped': '/False', 'source': 'us_census/p70-178.pdf', 'total_pages': 21, 'page': 0, 'page_label': '1'}, page_content='Occupation, Earnings, and Job \nCharacteristics\nJuly 2022\nP70-178\nClayton Gumber and Briana Sullivan\nCurrent Population Reports\nINTRODUCTION\nWork is a critical component of our lives and provides \na way to obtain material and nonmonetary benefits \nlike employer-provided health insurance. Scholars \nsuggest that our identities are also tied to the notion \nof “what we do” (Christiansen, 1999), and that who \nwe are is determined partly by our occupational iden -\ntity (Skorikov and Vondracek, 2011). However, work \nis time consuming—the American Tim

In [3]:
len(final_documents)

316

In [6]:
!pip install sentence-transformers

Collecting sentence-transformers
  Using cached sentence_transformers-5.0.0-py3-none-any.whl.metadata (16 kB)
Collecting transformers<5.0.0,>=4.41.0 (from sentence-transformers)
  Using cached transformers-4.54.1-py3-none-any.whl.metadata (41 kB)
Collecting torch>=1.11.0 (from sentence-transformers)
  Using cached torch-2.7.1-cp312-cp312-manylinux_2_28_x86_64.whl.metadata (29 kB)
Collecting scikit-learn (from sentence-transformers)
  Using cached scikit_learn-1.7.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (11 kB)
Collecting scipy (from sentence-transformers)
  Using cached scipy-1.16.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (61 kB)
Collecting Pillow (from sentence-transformers)
  Using cached pillow-11.3.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (9.0 kB)
Collecting regex!=2019.12.17 (from transformers<5.0.0,>=4.41.0->sentence-transformers)
  Downloading regex-2025.7.34-cp312-cp312-manylinux2014_x86_64.

In [7]:
## Embedding Using Huggingface
huggingface_embeddings=HuggingFaceBgeEmbeddings(
    model_name="BAAI/bge-small-en-v1.5",      #sentence-transformers/all-MiniLM-l6-v2
    model_kwargs={'device':'cpu'},
    encode_kwargs={'normalize_embeddings':True}

)



  from .autonotebook import tqdm as notebook_tqdm


In [8]:
import  numpy as np
print(np.array(huggingface_embeddings.embed_query(final_documents[0].page_content)))
print(np.array(huggingface_embeddings.embed_query(final_documents[0].page_content)).shape)


[ 2.54973117e-03  2.63492763e-02 -1.68258343e-02 -2.05101129e-02
  1.29443910e-02  4.53084484e-02  7.22067058e-02 -5.17665669e-02
 -8.47898889e-03  6.11901283e-03  4.40329835e-02  2.03839391e-02
 -2.34055356e-03 -3.12144030e-02 -3.88438702e-02  5.21582970e-03
  9.12016816e-03 -4.04682159e-02 -1.85118895e-02  1.20173525e-02
  1.39445225e-02 -1.83788594e-02 -2.87341028e-02 -2.45244079e-03
  2.24054419e-02  2.45228168e-02 -2.41129491e-02 -4.18447852e-02
 -1.06728636e-02 -1.00258812e-01 -4.08025570e-02  3.50487381e-02
  6.62722215e-02  4.92204726e-02  5.66506758e-02  2.45277733e-02
 -1.05979070e-02  5.30313700e-02 -9.45709646e-03  7.79527845e-03
  2.37157773e-02 -1.30060920e-03 -3.54394019e-02 -3.59673332e-03
 -4.40004133e-02  5.32553494e-02 -5.20878360e-02 -3.14391851e-02
 -7.96979815e-02  4.31129299e-02 -4.58358601e-03 -3.90583370e-03
  3.60486023e-02  1.11790925e-01  1.81182995e-02  2.24003498e-03
  5.55028543e-02 -4.71052248e-03 -3.36864963e-02 -2.01085340e-02
  5.83064696e-03  6.16032

  return forward_call(*args, **kwargs)


In [9]:

## VectorStore Creation
vectorstore=FAISS.from_documents(final_documents[:120],huggingface_embeddings)


In [10]:

## Query using Similarity Search
query="WHAT IS HEALTH INSURANCE COVERAGE?"
relevant_docments=vectorstore.similarity_search(query)

print(relevant_docments[0].page_content)



2 U.S. Census Bureau
WHAT IS HEALTH INSURANCE COVERAGE?
This brief presents state-level estimates of health insurance coverage 
using data from the American Community Survey (ACS). The  
U.S. Census Bureau conducts the ACS throughout the year; the 
survey asks respondents to report their coverage at the time of 
interview. The resulting measure of health insurance coverage, 
therefore, reflects an annual average of current comprehensive 
health insurance coverage status.* This uninsured rate measures a 
different concept than the measure based on the Current Population 
Survey Annual Social and Economic Supplement (CPS ASEC). 
For reporting purposes, the ACS broadly classifies health insurance 
coverage as private insurance or public insurance. The ACS defines 
private health insurance as a plan provided through an employer 
or a union, coverage purchased directly by an individual from an 
insurance company or through an exchange (such as healthcare.


In [11]:
retriever=vectorstore.as_retriever(search_type="similarity",search_kwargs={"k":3})
print(retriever)

tags=['FAISS', 'HuggingFaceBgeEmbeddings'] vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x7e8004128350> search_kwargs={'k': 3}


In [24]:
!pip install huggingface_hub==0.17.3 langchain-community==0.0.16

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Collecting huggingface_hub==0.17.3
  Downloading huggingface_hub-0.17.3-py3-none-any.whl.metadata (13 kB)
Collecting langchain-community==0.0.16
  Downloading langchain_community-0.0.16-py3-none-any.whl.metadata (7.8 kB)
Collecting langchain-core<0.2,>=0.1.16 (from langchain-community==0.0.16)
  Using cached langchain_core-0.1.53-py3-none-any.whl.metadata (5.9 kB)
Collecting langsmith<0.1,>=0.0.83 (from langchain-community==0.0.16)
  Using cached langsmith-0.0.92-py3-none-any.whl.metadata (9.9 kB)
Collecting numpy<2,>=1 (from langchain-community==0.0.16)
  Using cached numpy-1.26.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
Collecting tenacity<9.0.0,>=8.1.0 (from langchain-community==0.0.16)
  Using cached tenacity-8.5.0-py3-none-any.whl.metadata (1.2 kB)
INFO: pip is looking at multiple versions of langchain-core to determine which version is compatible with other requirements. This could take a while.
Collecting langchain-core<0.2,>=0.1.16 (from langc

In [31]:
import os
from dotenv import load_dotenv
import requests
from langchain_core.language_models.llms import LLM
from typing import Any, Optional, List, Mapping

# Ensure API token is loaded
load_dotenv()
api_token = os.getenv('HUGGINGFACEHUB_API_TOKEN')
if not api_token:
    raise ValueError("HUGGINGFACEHUB_API_TOKEN not found in environment variables")
else:
    print(f"Token loaded: {api_token[:4]}...{api_token[-4:]}")

# Create a custom wrapper for the Hugging Face API
class CustomHuggingFaceAPI(LLM):
    model_id: str
    api_token: str
    
    def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
        API_URL = f"https://api-inference.huggingface.co/models/{self.model_id}"
        headers = {"Authorization": f"Bearer {self.api_token}"}
        
        print(f"Calling API for model: {self.model_id}")
        
        # Special handling for QA models (like roberta-base-squad2)
        if "squad" in self.model_id.lower():
            # For QA models, we need both a question and context
            # Since we don't have real context here for testing, we'll use the question as both
            payload = {
                "inputs": {
                    "question": prompt,
                    "context": "Health insurance coverage is a type of insurance that covers medical expenses. It provides financial protection against healthcare costs related to hospitalization, routine checkups, and prescription medications. Many Americans receive health insurance through employers, government programs like Medicare and Medicaid, or purchase it individually."
                }
            }
        else:
            # For standard text generation models
            payload = {"inputs": prompt, "parameters": {"temperature": 0.1, "max_length": 100}}
        
        response = requests.post(API_URL, headers=headers, json=payload)
        if response.status_code != 200:
            error_message = f"Error: {response.status_code}, {response.text}"
            print(error_message)
            return error_message
        
        result = response.json()
        
        # Handle different response formats
        if "squad" in self.model_id.lower():
            # QA models return answer, score, etc.
            if isinstance(result, dict) and "answer" in result:
                return result["answer"]
            return str(result)
        elif isinstance(result, list) and len(result) > 0:
            if isinstance(result[0], dict) and "generated_text" in result[0]:
                return result[0]["generated_text"]
            return str(result[0])
        return str(result)
    
    @property
    def _llm_type(self) -> str:
        return "custom_huggingface"

# Use a model designed for question-answering
hf = CustomHuggingFaceAPI(
    model_id="deepset/roberta-base-squad2",  # QA-focused model
    api_token=api_token
)

# Test the model with a simple query
query = "What is health insurance coverage?"
response = hf.invoke(query)
print(response)

Token loaded: hf_k...bVEn
Calling API for model: deepset/roberta-base-squad2
a type of insurance that covers medical expenses


In [32]:
#Hugging Face models can be run locally through the HuggingFacePipeline class.
from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline

hf = HuggingFacePipeline.from_model_id(
    model_id="mistralai/Mistral-7B-v0.1",
    task="text-generation",
    pipeline_kwargs={"temperature": 0, "max_new_tokens": 300}
)

llm = hf 
llm.invoke(query)

OSError: You are trying to access a gated repo.
Make sure to have access to it at https://huggingface.co/mistralai/Mistral-7B-v0.1.
401 Client Error. (Request ID: Root=1-688bb8bb-2f781d5f4eefbbb608ac500e;2501a547-5bc2-4d84-8e6e-a8844b441af2)

Cannot access gated repo for url https://huggingface.co/mistralai/Mistral-7B-v0.1/resolve/main/config.json.
Access to model mistralai/Mistral-7B-v0.1 is restricted. You must have access to it and be authenticated to access it. Please log in.

In [33]:
prompt_template="""
Use the following piece of context to answer the question asked.
Please try to provide the answer only based on the context

{context}
Question:{question}

Helpful Answers:
 """

In [34]:
prompt=PromptTemplate(template=prompt_template,input_variables=["context","question"])

In [35]:


retrievalQA=RetrievalQA.from_chain_type(
    llm=hf,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True,
    chain_type_kwargs={"prompt":prompt}
)



In [36]:
query="""DIFFERENCES IN THE
UNINSURED RATE BY STATE
IN 2022"""

In [38]:
# Create a specialized class for RAG with SQuAD models
class CustomHuggingFaceRAG:
    def __init__(self, retriever, api_token, model_id="deepset/roberta-base-squad2"):
        self.retriever = retriever
        self.api_token = api_token
        self.model_id = model_id
        
    def invoke(self, query_dict):
        query = query_dict["query"]
        # Get relevant documents from the retriever
        docs = self.retriever.get_relevant_documents(query)
        
        # Combine the document content to create context
        context = "\n\n".join([doc.page_content for doc in docs])
        
        # Make a direct API call for the SQuAD model
        API_URL = f"https://api-inference.huggingface.co/models/{self.model_id}"
        headers = {"Authorization": f"Bearer {self.api_token}"}
        
        payload = {
            "inputs": {
                "question": query,
                "context": context
            }
        }
        
        print(f"Sending query to {self.model_id}...")
        response = requests.post(API_URL, headers=headers, json=payload)
        
        if response.status_code != 200:
            error_message = f"Error: {response.status_code}, {response.text}"
            print(error_message)
            return {"result": error_message, "source_documents": docs}
        
        result = response.json()
        answer = result.get("answer", str(result))
        
        return {
            "result": answer,
            "source_documents": docs
        }

# Create the RAG system with direct API access
custom_rag = CustomHuggingFaceRAG(
    retriever=retriever,
    api_token=api_token
)

# Test with your query
query = "DIFFERENCES IN THE UNINSURED RATE BY STATE IN 2022"
result = custom_rag.invoke({"query": query})
print(result["result"])

  docs = self.retriever.get_relevant_documents(query)
  return forward_call(*args, **kwargs)


Sending query to deepset/roberta-base-squad2...
Twenty-seven states had lower 
uninsured rates


In [None]:
# Call the QA chain with our query.
result = retrievalQA.invoke({"query": query})
print(result['result'])