## Naive RAG
- Upload Documents -> Create Vector Store
- User Query -> Retrieve relevent Chunks -> Inject in Prompt -> Generate Answer

In [6]:
from dotenv import load_dotenv
from langchain import hub
from langchain_community.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnableLambda, RunnablePassthrough
from langchain_text_splitters import RecursiveCharacterTextSplitter

import sys
import os
sys.path.insert(0, os.path.abspath('..'))

from utilities.file_utils import load_pdf_using_PyPDF
from utilities.text_utils import format_docs, log_chunks

In [10]:
file_path = "../docs/042f627c5d8f619cf62cc21f864b08dfd59059d0b9aab805d132e0014489d625.pdf"
pages = await load_pdf_using_PyPDF(file_path) # Average time with PyPDF: 2.5s

In [11]:
# Splitting into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(pages)

In [12]:
# Embeddings
all_mini_embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
bge_embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-small-en-v1.5")

embeddings = bge_embeddings

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
# Creating Vector Store
vector_store = FAISS.from_documents(splits, embedding=embeddings)

In [None]:
# Store the vector DB locally to save processing time
vector_store.save_local("../vector_store/042f627c5d8f619cf62cc21f864b08dfd59059d0b9aab805d132e0014489d625")

In [None]:
# Load the saved vector store
vector_store = FAISS.load_local("../vector_store/042f627c5d8f619cf62cc21f864b08dfd59059d0b9aab805d132e0014489d625", embeddings, allow_dangerous_deserialization=True)

In [8]:
retriever = vector_store.as_retriever()

In [9]:
load_dotenv()

llm = ChatGoogleGenerativeAI(
    model="gemini-2.0-flash",
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
)

In [10]:
# Retrieve, Log and format the chunks
logged_retriever = retriever | RunnableLambda(log_chunks) | format_docs

In [None]:
prompt = hub.pull("rlm/rag-prompt")

# Chain
rag_chain = (
    {"context": logged_retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

# Question
rag_chain.invoke("what is the minimum and maximum In-patient Hospitalization Treatment Limits for Imperial plus plan?")
# rag_chain.invoke("what was the last question i asked?") # No Memory of last question
# rag_chain.invoke("46-year-old male, knee surgery in Pune, 3-month-old insurance policy")
# rag_chain.invoke("knee surgery?")


Retrieved Chunks:
Inpatient treatment free of charge for a medical condition that is covered by Us. 
This benefit is payable after You are discharged from Hospital. 
This cover is subject to the Sum Insured, sub-limits, terms, conditions and definitions, exclusions contained or 
otherwise expressed in this Policy. 
 
16. Palliative care (Applicable to Imperial Plus Plan only) 
We will pay the Reasonable and Customary expenses incurred, up to the limit specified in the Policy Schedule, on 
diagnosis of a Terminal Illness, for any ongoing treatment, given on the advice of a Medical Practitioner, that aims 
to alleviate the physical/psychological suffering associated with progressive, incurable Illness and to maintain 
quality of life. It includes Inpatient, day-care and out-patient treatment. We will pay for physical care, psychological 
care, Hospital or hospice accommodation, nursing care and prescription drugs.
---
Treatment Limits 
USD 
100,000 
USD 
150,000 
USD 
200,000 
USD 
300,

'The minimum In-patient Hospitalization Treatment Limit for the Imperial Plus plan is USD 100,000. The maximum limit can be up to the In-patient Sum Insured. The policy schedule specifies the exact limit.'