## RAG with Memory
Together with question and context, also track previous conversation and pass it to the LLM.

In [1]:
from dotenv import load_dotenv
from langchain import hub
from langchain_community.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.output_parsers import StrOutputParser
from langchain_core.chat_history import InMemoryChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain_core.runnables import RunnableLambda, RunnablePassthrough
from langchain_text_splitters import RecursiveCharacterTextSplitter

from utilities.pdf_utils import load_pdf_using_PyPDF
from utilities.text_utils import format_docs, log_chunks
from prompts import CHAT_HISTORY_PROMPT

In [2]:
file_path = "docs/BAJHLIP23020V012223.pdf"
pages = await load_pdf_using_PyPDF(file_path) # Average time with PyPDF: 2.5s

In [3]:
# Splitting into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(pages)

In [4]:
# Embeddings
# all_mini_embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
bge_embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-small-en-v1.5")

embeddings = bge_embeddings

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
# Creating Vector Store
vector_store = FAISS.from_documents(splits, embedding=embeddings)

In [None]:
# Store the vector DB locally to save processing time
vector_store.save_local("vector_store/faiss_index_bajaj")

In [5]:
# Load the saved vector store
vector_store = FAISS.load_local("vector_store/faiss_index_bajaj", embeddings, allow_dangerous_deserialization=True)

In [6]:
retriever = vector_store.as_retriever()

In [7]:
load_dotenv()

llm = ChatGoogleGenerativeAI(
    model="gemini-2.0-flash",
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
)

In [8]:
# Retrieve, Log and format the chunks
logged_retriever = retriever | RunnableLambda(log_chunks) | format_docs

In [9]:
# Add history using memory
session_histories = {}

def get_session_history(session_id: str):
    if session_id not in session_histories:
        session_histories[session_id] = InMemoryChatMessageHistory()
    return session_histories[session_id]

In [22]:
# Extract question and history to pass to retriever and prompt
def input_mapper(input):
    return {
        "context": logged_retriever.invoke(input["question"]),
        "question": input["question"],
        "history": input["history"]
    }

In [23]:
# Chain
rag_chain = (
    RunnableLambda(input_mapper)
    | CHAT_HISTORY_PROMPT
    | llm
    | StrOutputParser()
)

rag_with_history = RunnableWithMessageHistory(
    runnable=rag_chain,
    get_session_history=get_session_history,
    input_messages_key="question",
    history_messages_key="history",
    output_messages_key="output"
)

In [25]:
session_id = "user-1234" # Some unique identifier per user/session

# Question
# rag_with_history.invoke({"question": "what is the minimum and maximum In-patient Hospitalization Treatment Limits for Imperial plus plan?"}, 
#                         config={"configurable": {"session_id": session_id}})
rag_with_history.invoke({"question": "what was the last question i asked?"}, 
                        config={"configurable": {"session_id": session_id}})
# rag_with_history.invoke("46-year-old male, knee surgery in Pune, 3-month-old insurance policy")
# rag_with_history.invoke("knee surgery?")


Retrieved Chunks:
Policy Period. 
iv. At the end of the Policy Period, the Policy shall terminate and can be renewed within the Grace Period of 30 
days to maintain continuity of benefits without break in Policy. Coverage is not available during the grace 
period. 
v. No loading shall apply on renewals based on individual claims experience 
 
11. Withdrawal of Policy 
i. In the likelihood of this product being withdrawn in future, the Company will intimate the insured person about 
the same 90 days prior to expiry of the Policy. 
ii. Insured Person will have the option to migrate to similar health insurance product available with the Company at 
the time of renewal with all the accrued continuity benefits such as cumulative bonus, waiver of waiting period 
as per IRDAI guidelines, provided the Policy has been maintained without a break. 
 
12. Moratorium Period 
After completion of eight continuous years under this Policy no look back to be applied. This period of eight years is
---
o

'The last question you asked was: "what is the minimum and maximum In-patient Hospitalization Treatment Limits for Imperial plus plan?"'