## Imports:

In [1]:
# env:
import os
from dotenv import load_dotenv
load_dotenv()
# Chat:
from operator import itemgetter
from langchain_core.documents import Document
from langchain_core.messages import AIMessage, HumanMessage, SystemMessage
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.output_parsers import StrOutputParser
# History
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.messages import trim_messages
from langchain_core.runnables import RunnableWithMessageHistory, RunnablePassthrough
# Load
from langchain_community.document_loaders import PyMuPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.chains.combine_documents import create_stuff_documents_chain
# Store
from langchain_ollama import OllamaEmbeddings
from langchain_community.vectorstores import FAISS
# Retrieve
from langchain.chains import create_retrieval_chain, create_history_aware_retriever

In [2]:
from IPython.display import Markdown
# from llm import get_response_stream, get_response

In [3]:
# for chunk in get_response("hello", dummy=True):
#     print(chunk, end="", flush=True)

## LLM:

In [4]:
MAX_TOKENS = 16000
PER_DOC_TOKENS = 750
SYS_PROMPT_SIZE = 1000 # assumed
TOTAL_DOC_SIZE = 3000
DOC_COUNT = TOTAL_DOC_SIZE // PER_DOC_TOKENS

In [5]:
from langchain_ollama import ChatOllama

# Gemma3 context size -> 128K (1,31,072)
# 30k -> 91% RAM, 91% GPU
# 25k -> 82% RAM, 89% GPU
# 15k -> 66% RAM, 87% GPU


llm = ChatOllama(
    model="gemma3:latest", temperature=1,
    # model="gemma3:1b", temperature=1,
    #  num_predict=MAX_OUTPUT_TOKENS,
    num_gpu=35, num_ctx=MAX_TOKENS
)
llm.invoke("Hii")

AIMessage(content="Hi there! How's your day going so far? 😊 \n\nIs there anything you'd like to talk about, or were you just saying hello?", additional_kwargs={}, response_metadata={'model': 'gemma3:latest', 'created_at': '2025-05-20T15:48:29.098803575Z', 'done': True, 'done_reason': 'stop', 'total_duration': 1880033100, 'load_duration': 1326205909, 'prompt_eval_count': 11, 'prompt_eval_duration': 190513387, 'eval_count': 34, 'eval_duration': 362716599, 'message': Message(role='assistant', content='', images=None, tool_calls=None)}, id='run-395d3a93-8e96-4bb0-9cb4-efb32f06f628-0', usage_metadata={'input_tokens': 11, 'output_tokens': 34, 'total_tokens': 45})

In [6]:
# from langchain_groq import ChatGroq
# llm = ChatGroq(
#     model="llama-3.3-70b-versatile", temperature="1",
#     max_tokens=MAX_TOKENS, api_key=os.environ.get("GROQ_API_KEY"),
# )
# llm.invoke('hi')

In [7]:
# Markdown(llm.invoke("write a story").content)

## Template:

<div class="alert alert-info">
    <strong>Limitations:</strong> Listed are some base assumptions in certain components of langchain components.
</div>

- `CreateHistoryAwareRetriever` assumes the latest-user-message key to be `input`
- `Trimmer` assumes the `ChatHistory` key to be `messages`
- `CreateStuffDocumentChain` assumes returns the clubbed `docs` in key `context`
- To overcome this, you need to use `RunnablePassthrough` or RunnableMap and assign those keys and variables accordingly.
- But remember, you need to manually set such things for all the variables which u are using different than default.

- So it's always good to follow the default keys and avoid complexity in chains.

### Chat:

In [8]:
template_chat = ChatPromptTemplate.from_messages(
    messages=[
        ("system",  "".join([
            "You are a highly knowledgeable and helpful AI assistant.\n"
            "You are provided with the user's chat history and external documents to assist in your response.\n\n"
            "Your task is to:\n"
            "- Accurately and clearly answer the user's latest question.\n"
            "- Incorporate any relevant information from the context documents enclosed below.\n"
            # "- Reference the source(s) whenever applicable.\n"
            "- Use appropriate markdown formatting for clarity and readability (e.g., bullet points, headings, code blocks, tables).\n\n"
            "- If not available in the context, mention that and then answer from your own knowledge.\n"
            "Contextual Documents:\n"
            "<CONTEXT>{context}</CONTEXT>"
        ])),
        MessagesPlaceholder(variable_name="chat_history"),
        ("human", "{input} \n\n **Strictly stick to the instructions!**")
    ]
)
template_chat

ChatPromptTemplate(input_variables=['chat_history', 'context', 'input'], input_types={'chat_history': list[typing.Annotated[typing.Union[typing.Annotated[langchain_core.messages.ai.AIMessage, Tag(tag='ai')], typing.Annotated[langchain_core.messages.human.HumanMessage, Tag(tag='human')], typing.Annotated[langchain_core.messages.chat.ChatMessage, Tag(tag='chat')], typing.Annotated[langchain_core.messages.system.SystemMessage, Tag(tag='system')], typing.Annotated[langchain_core.messages.function.FunctionMessage, Tag(tag='function')], typing.Annotated[langchain_core.messages.tool.ToolMessage, Tag(tag='tool')], typing.Annotated[langchain_core.messages.ai.AIMessageChunk, Tag(tag='AIMessageChunk')], typing.Annotated[langchain_core.messages.human.HumanMessageChunk, Tag(tag='HumanMessageChunk')], typing.Annotated[langchain_core.messages.chat.ChatMessageChunk, Tag(tag='ChatMessageChunk')], typing.Annotated[langchain_core.messages.system.SystemMessageChunk, Tag(tag='SystemMessageChunk')], typing.

In [9]:
# Calculate tokens in this System message and pass rest of the max possible chat history:
# trim_keep = model_context - template_tokens - 250 (safe side)
# template_chat.messages[0].content

### Summarize:

In [10]:
template_summarize = ChatPromptTemplate.from_messages(
    messages=[
        ("system", "".join([
            "You are an expert at summarizing conversations into standalone prompts.\n"
            "You are given a complete chat history, ending with the user's latest message.\n\n"
            "Your task is to:\n"
            "- Understand the entire conversation context.\n"
            "- Identify references in the latest user message that relate to earlier messages.\n"
            "- Create a single clear, concise, and standalone question or prompt.\n"
            "- This final prompt should be fully understandable without needing the prior conversation.\n"
            "- It will be used to retrieve the most relevant documents.\n\n"
            "Only return the rewritten standalone prompt. Do not add explanations or formatting."
        ])),
        MessagesPlaceholder(variable_name="chat_history"),
        ("human", "{input}. \n\n **Make one standalone prompt as asked!**")
    ]
)
template_summarize

ChatPromptTemplate(input_variables=['chat_history', 'input'], input_types={'chat_history': list[typing.Annotated[typing.Union[typing.Annotated[langchain_core.messages.ai.AIMessage, Tag(tag='ai')], typing.Annotated[langchain_core.messages.human.HumanMessage, Tag(tag='human')], typing.Annotated[langchain_core.messages.chat.ChatMessage, Tag(tag='chat')], typing.Annotated[langchain_core.messages.system.SystemMessage, Tag(tag='system')], typing.Annotated[langchain_core.messages.function.FunctionMessage, Tag(tag='function')], typing.Annotated[langchain_core.messages.tool.ToolMessage, Tag(tag='tool')], typing.Annotated[langchain_core.messages.ai.AIMessageChunk, Tag(tag='AIMessageChunk')], typing.Annotated[langchain_core.messages.human.HumanMessageChunk, Tag(tag='HumanMessageChunk')], typing.Annotated[langchain_core.messages.chat.ChatMessageChunk, Tag(tag='ChatMessageChunk')], typing.Annotated[langchain_core.messages.system.SystemMessageChunk, Tag(tag='SystemMessageChunk')], typing.Annotated[l

In [11]:
# Calculate tokens in this System message and pass rest of the max possible chat history:
# trim_keep = model_context - template_tokens - (1000tok/doc * n-docs) - 250 (safe side)
# template_summarize.messages

## Chat Message History:

In [12]:
chat_histories = {}

In [13]:
def get_session_history(session_id:str) -> BaseChatMessageHistory:
    # print("*"*40, session_id, "*"*40)
    if session_id not in chat_histories:
        chat_histories[session_id] = ChatMessageHistory()
        # log here for creation of new chat history
        print(f"Created chat hist for session id: `{session_id}`")    
    return chat_histories[session_id]

get_session_history("abv")

Created chat hist for session id: `abv`


InMemoryChatMessageHistory(messages=[])

In [14]:
get_session_history("abv")

InMemoryChatMessageHistory(messages=[])

### Trimmer:
- Due to some un-known issue in variable name of messages being "chat_history" or "messages" the trimmer cant be used in this RAG implementation.
- Reason: Trimmer expects "messages"
- But, if i use "messages", then idk why, the summarizer step does not call LLM at all, it just does not work, and is completely untraceable.
- Still, if u want to implement, use one runnable_passthrough before the trimmer in chain to convert chat_history > messages and the after its response, output > chat_history again! 

In [15]:
# # For summary 15k chat + 1k system and all
# trim_summary = trim_messages(
#     max_tokens=MAX_TOKENS - SYS_PROMPT_SIZE,
#     strategy="last", token_counter=llm, start_on="human",
#     allow_partial=True,  # include_system=True,
# )

# # For chat 10k chat + 5*1k docs + 1k system and all
# trim_chat = trim_messages(
#     max_tokens=MAX_TOKENS - (TOTAL_DOC_SIZE) - SYS_PROMPT_SIZE,
#     strategy="last", token_counter=llm, start_on="human",
#     allow_partial=True,  # include_system=True,
# )

## VectorStore:
### Embeddings:
- Notice that the embeddings are not offloaded at all to the GPU
- This is done because the Ollama repeatedly keeps loading and un-loading the emb / llm in each call.
- Even when I have memory, IDK why ollama loads only one of them?

In [16]:
embeddings = OllamaEmbeddings(model="mxbai-embed-large:latest", num_gpu=0)
embeddings

OllamaEmbeddings(model='mxbai-embed-large:latest', base_url=None, client_kwargs={}, mirostat=None, mirostat_eta=None, mirostat_tau=None, num_ctx=None, num_gpu=0, keep_alive=None, num_thread=None, repeat_last_n=None, repeat_penalty=None, temperature=None, stop=None, tfs_z=None, top_k=None, top_p=None)

### Loader:

In [17]:
file = PyMuPDFLoader(file_path="./assets/pdf_w_text.pdf", extract_tables='markdown', extract_images=True).load()
file

[Document(metadata={'producer': 'Acrobat Distiller 4.0 for Windows', 'creator': 'Microsoft Word 8.0', 'creationdate': '2000-06-29T10:21:08+11:00', 'source': './assets/pdf_w_text.pdf', 'file_path': './assets/pdf_w_text.pdf', 'total_pages': 1, 'format': 'PDF 1.3', 'title': 'This is a test PDF file', 'author': 'cdaily', 'subject': '', 'keywords': '', 'moddate': '2013-10-28T15:24:13-04:00', 'trapped': '', 'modDate': "D:20131028152413-04'00'", 'creationDate': "D:20000629102108+11'00'", 'page': 0}, page_content="Adobe Acrobat PDF Files\nAdobe® Portable Document Format (PDF) is a universal file format that preserves all\nof the fonts, formatting, colours and graphics of any source document, regardless of\nthe application and platform used to create it.\nAdobe PDF is an ideal format for electronic document distribution as it overcomes the\nproblems commonly encountered with electronic file sharing.\n• Anyone, anywhere can open a PDF file. All you need is the free Adobe Acrobat\nReader. Recipie

### Splitter:

In [18]:
splitter = RecursiveCharacterTextSplitter(
    chunk_size=PER_DOC_TOKENS, chunk_overlap=150,
)
splitter

<langchain_text_splitters.character.RecursiveCharacterTextSplitter at 0x782663610650>

### Database:

In [19]:
splitted = splitter.split_documents(file)
splitted

[Document(metadata={'producer': 'Acrobat Distiller 4.0 for Windows', 'creator': 'Microsoft Word 8.0', 'creationdate': '2000-06-29T10:21:08+11:00', 'source': './assets/pdf_w_text.pdf', 'file_path': './assets/pdf_w_text.pdf', 'total_pages': 1, 'format': 'PDF 1.3', 'title': 'This is a test PDF file', 'author': 'cdaily', 'subject': '', 'keywords': '', 'moddate': '2013-10-28T15:24:13-04:00', 'trapped': '', 'modDate': "D:20131028152413-04'00'", 'creationDate': "D:20000629102108+11'00'", 'page': 0}, page_content="Adobe Acrobat PDF Files\nAdobe® Portable Document Format (PDF) is a universal file format that preserves all\nof the fonts, formatting, colours and graphics of any source document, regardless of\nthe application and platform used to create it.\nAdobe PDF is an ideal format for electronic document distribution as it overcomes the\nproblems commonly encountered with electronic file sharing.\n• Anyone, anywhere can open a PDF file. All you need is the free Adobe Acrobat\nReader. Recipie

In [20]:
# This initialization needs 4 param, so rather moving to adding one doc manually.
database = FAISS.from_documents(documents=splitted, embedding=embeddings)
database

<langchain_community.vectorstores.faiss.FAISS at 0x782676db6720>

In [21]:
print(repr(splitted[0].page_content))
print(len(splitted[0].page_content.split(" ")))

"Adobe Acrobat PDF Files\nAdobe® Portable Document Format (PDF) is a universal file format that preserves all\nof the fonts, formatting, colours and graphics of any source document, regardless of\nthe application and platform used to create it.\nAdobe PDF is an ideal format for electronic document distribution as it overcomes the\nproblems commonly encountered with electronic file sharing.\n• Anyone, anywhere can open a PDF file. All you need is the free Adobe Acrobat\nReader. Recipients of other file formats sometimes can't open files because they\ndon't have the applications used to create the documents.\n• PDF files always print correctly on any printing device.\n• PDF files always display exactly as created, regardless of fonts, software, and"
109


### Retriever:

- So for 750 chars, there are appx 95 word (max 150)
- In order to retrieve the 3k tokens, we need to have 3k/150 = 20 chunks
- So, set k=20

In [22]:
retriever = database.as_retriever(
    search_type="similarity",
    search_kwargs={'k': 20}
)
retriever

VectorStoreRetriever(tags=['FAISS', 'OllamaEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x782676db6720>, search_kwargs={'k': 20})

In [23]:
retriever.invoke("fun")

[Document(id='85ac837c-f5c2-447e-90ee-704e3af4f03c', metadata={'producer': 'Acrobat Distiller 4.0 for Windows', 'creator': 'Microsoft Word 8.0', 'creationdate': '2000-06-29T10:21:08+11:00', 'source': './assets/pdf_w_text.pdf', 'file_path': './assets/pdf_w_text.pdf', 'total_pages': 1, 'format': 'PDF 1.3', 'title': 'This is a test PDF file', 'author': 'cdaily', 'subject': '', 'keywords': '', 'moddate': '2013-10-28T15:24:13-04:00', 'trapped': '', 'modDate': "D:20131028152413-04'00'", 'creationDate': "D:20000629102108+11'00'", 'page': 0}, page_content="Adobe Acrobat PDF Files\nAdobe® Portable Document Format (PDF) is a universal file format that preserves all\nof the fonts, formatting, colours and graphics of any source document, regardless of\nthe application and platform used to create it.\nAdobe PDF is an ideal format for electronic document distribution as it overcomes the\nproblems commonly encountered with electronic file sharing.\n• Anyone, anywhere can open a PDF file. All you need

## Summarizer:

- Old method.
- This is too much hard-coded, switch to the retrieval method with the create_stuff_chain to ingest the documents and get the answer in one single chain call.

In [24]:
# chain = (
#     RunnablePassthrough().assign(messages=itemgetter("messages") | trim_chat)
#     | template_summarize | llm | StrOutputParser())

# summarizer_llm = RunnableWithMessageHistory(
#     runnable=chain,
#     get_session_history=get_session_history,
#     input_messages_key="input",
#     history_messages_key="messages",
# )

In [25]:
# chat_histories[10] = ChatMessageHistory()
# chat_histories[10].messages = [
#     HumanMessage("Hello, I'm Bhushan, What is your name?"),
#     AIMessage("I am an AI assistant. I am not a human like you."),
#     HumanMessage("What is Artificial General Intelligence?"),
#     AIMessage("Artificial General Intelligence (AGI) refers to highly autonomous systems that outperform humans at most economically valuable work."),
# ]
# # )

In [26]:
# summarizer_llm.invoke(
#     input={"input": "So it's not achieved yet?", },
#     config={"configurable": {"session_id": 10}}
# )

In [27]:
# chat_histories[10].messages

## Runnable With History:
- Commented out as it's un-necessary and not used in the code.
- But, keep it, as it can be used in the future.

In [28]:
# chain = (
#     RunnablePassthrough(name="Trim Chat History").assign(messages=itemgetter("messages") | trim_chat)
#     | template_chat | llm | StrOutputParser())

# chat_llm = RunnableWithMessageHistory(
#     runnable=chain,
#     get_session_history=get_session_history,
#     input_messages_key="input",
#     history_messages_key="messages",
# )

In [29]:
# chat_llm.invoke(
#     input={
#         "input": "Hello, I'm Bhushan, What is your name?",
#         "context": "This is some random document which contains some random information."
#     },
#     config={
#         "configurable": {
#             "session_id": 15
#         }
#     }
# )

In [30]:
# chat_llm.invoke(
#     input={
#         "input": "What did we discuss?",
#         "context": "There is no context available for this question."
#     },
#     config={
#         "configurable": {
#             "session_id": 15
#         }
#     }
# )

- If () add option to paste link and scrap whole content from there.

## Chain:

In [31]:
# Make a passthrough which prints variables and passes them to next step
def print_and_pass(input):
    print(input)
    return input

In [32]:
# 3 User Input + Chat History > Summarizer Template > Standalone Que > Get Docs
summarize_chain = create_history_aware_retriever(llm, retriever, template_summarize)
# summarize_chain = trim_summary | create_history_aware_retriever(llm, retriever, template_summarize)

# 4 Multiple Docs > Combine All > Chat Template > Final Output
qa_chain = create_stuff_documents_chain(llm=llm, prompt=template_chat)

# 2 Input + Chat History > [ `Summarizer Template` > `Get Docs` ] > [ `Combine` > `Chat Template` ] > Output
rag_chain = create_retrieval_chain(summarize_chain , qa_chain)

# 1 Final main chain:
conversational_rag_chain = RunnableWithMessageHistory(
    runnable=rag_chain,
    get_session_history=get_session_history,
    input_messages_key="input",
    history_messages_key="chat_history",
    output_messages_key="answer",
)
conversational_rag_chain

RunnableWithMessageHistory(bound=RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
  chat_history: RunnableBinding(bound=RunnableLambda(_enter_history), kwargs={}, config={'run_name': 'load_history'}, config_factories=[])
}), kwargs={}, config={'run_name': 'insert_history'}, config_factories=[])
| RunnableBinding(bound=RunnableLambda(_call_runnable_sync), kwargs={}, config={'run_name': 'check_sync_or_async'}, config_factories=[]), kwargs={}, config={'run_name': 'RunnableWithMessageHistory'}, config_factories=[]), kwargs={}, config={}, config_factories=[], get_session_history=<function get_session_history at 0x782663607c40>, input_messages_key='input', output_messages_key='answer', history_messages_key='chat_history', history_factory_config=[ConfigurableFieldSpec(id='session_id', annotation=<class 'str'>, name='Session ID', description='Unique identifier for a session.', default='', is_shared=True, dependencies=None)])

## Test:

### Database:

In [33]:
database.add_documents(
    [
        Document("Cats and Dogs are both popular pets."),
        Document("Cats are independent and low-maintenance pets."),
        Document("Dogs are loyal and require more attention."),
        Document("Cats are often seen as aloof and mysterious."),
        Document("Dogs are known for their loyalty and companionship."),
        Document("Cats are great for small living spaces."),
        Document("Cats are NOT AT ALL LOYAL."),
    ],
    embedding=embeddings
)

['a1cd727f-9fc6-4585-a6b2-571d5a59a803',
 '46bd6c46-f3d4-48d5-86b6-f2479a957b3e',
 'b0d1c006-be00-4c3d-b609-ecb71302ef01',
 '5f3b1177-d2a3-4d4f-b887-5cd9b5a27a95',
 'de68b1c6-7416-4a38-94ae-24613b054a0c',
 '8643d5be-c856-49ae-be4f-c25d99e39a2c',
 '5f4fa239-cfaa-4100-9f1c-8005bf02ffbb']

In [34]:
database.search(search_type='similarity', query="animals", k=8)

[Document(id='a1cd727f-9fc6-4585-a6b2-571d5a59a803', metadata={}, page_content='Cats and Dogs are both popular pets.'),
 Document(id='de68b1c6-7416-4a38-94ae-24613b054a0c', metadata={}, page_content='Dogs are known for their loyalty and companionship.'),
 Document(id='b0d1c006-be00-4c3d-b609-ecb71302ef01', metadata={}, page_content='Dogs are loyal and require more attention.'),
 Document(id='46bd6c46-f3d4-48d5-86b6-f2479a957b3e', metadata={}, page_content='Cats are independent and low-maintenance pets.'),
 Document(id='8643d5be-c856-49ae-be4f-c25d99e39a2c', metadata={}, page_content='Cats are great for small living spaces.'),
 Document(id='5f3b1177-d2a3-4d4f-b887-5cd9b5a27a95', metadata={}, page_content='Cats are often seen as aloof and mysterious.'),
 Document(id='5f4fa239-cfaa-4100-9f1c-8005bf02ffbb', metadata={}, page_content='Cats are NOT AT ALL LOYAL.'),
 Document(id='1733dbd3-6990-49b0-8ec5-33fe0a3e331d', metadata={'producer': 'Acrobat Distiller 4.0 for Windows', 'creator': 'Micr

### Summarize Chain:

In [35]:
summarize_chain.invoke(
    input={
        "input": "What animal was i talking about? Which one is most common apart from that animal?",
        "chat_history": [
            HumanMessage("Hello, I'm Bhushan, What is your name?"),
            AIMessage("I am an AI assistant. I am not a human like you."),
            HumanMessage("What are ur thoughts on DOGs?"),
            AIMessage("Dogs are loyal and require more attention."), 
        ]
    },
)

[Document(id='a1cd727f-9fc6-4585-a6b2-571d5a59a803', metadata={}, page_content='Cats and Dogs are both popular pets.'),
 Document(id='de68b1c6-7416-4a38-94ae-24613b054a0c', metadata={}, page_content='Dogs are known for their loyalty and companionship.'),
 Document(id='b0d1c006-be00-4c3d-b609-ecb71302ef01', metadata={}, page_content='Dogs are loyal and require more attention.'),
 Document(id='46bd6c46-f3d4-48d5-86b6-f2479a957b3e', metadata={}, page_content='Cats are independent and low-maintenance pets.'),
 Document(id='5f3b1177-d2a3-4d4f-b887-5cd9b5a27a95', metadata={}, page_content='Cats are often seen as aloof and mysterious.'),
 Document(id='8643d5be-c856-49ae-be4f-c25d99e39a2c', metadata={}, page_content='Cats are great for small living spaces.'),
 Document(id='5f4fa239-cfaa-4100-9f1c-8005bf02ffbb', metadata={}, page_content='Cats are NOT AT ALL LOYAL.'),
 Document(id='85ac837c-f5c2-447e-90ee-704e3af4f03c', metadata={'producer': 'Acrobat Distiller 4.0 for Windows', 'creator': 'Micr

### QA - Chain:

In [36]:
qa_chain.invoke(
    input={
        "input": "Full form of RAG?",
        "context": [Document(page_content="This is some random document which contains some random information.")],
        "chat_history": [
            HumanMessage("hi"),
            AIMessage("hello"),
            HumanMessage("What is RAG?"),
            AIMessage("RAG is a technique to combine retrieval and generation."),
        ]
    },
    # config={"configurable": {"session_id": 15}}
)

'Retrieval-Augmented Generation'

### RAG Chain:

In [37]:
rag_chain.invoke(
    input={
        "input": "Full form of RAG?",
        "context": [Document(page_content="This is some random document which contains some random information.")],
        "chat_history": [
            HumanMessage("hi"),
            AIMessage("hello"),
            HumanMessage("What is RAG?"),
            AIMessage("RAG is a technique to combine retrieval and generation."),
        ]
    },
    # config={"configurable": {"session_id": 15}}
)

{'input': 'Full form of RAG?',
 'context': [Document(id='85ac837c-f5c2-447e-90ee-704e3af4f03c', metadata={'producer': 'Acrobat Distiller 4.0 for Windows', 'creator': 'Microsoft Word 8.0', 'creationdate': '2000-06-29T10:21:08+11:00', 'source': './assets/pdf_w_text.pdf', 'file_path': './assets/pdf_w_text.pdf', 'total_pages': 1, 'format': 'PDF 1.3', 'title': 'This is a test PDF file', 'author': 'cdaily', 'subject': '', 'keywords': '', 'moddate': '2013-10-28T15:24:13-04:00', 'trapped': '', 'modDate': "D:20131028152413-04'00'", 'creationDate': "D:20000629102108+11'00'", 'page': 0}, page_content="Adobe Acrobat PDF Files\nAdobe® Portable Document Format (PDF) is a universal file format that preserves all\nof the fonts, formatting, colours and graphics of any source document, regardless of\nthe application and platform used to create it.\nAdobe PDF is an ideal format for electronic document distribution as it overcomes the\nproblems commonly encountered with electronic file sharing.\n• Anyone,

### Conv RAG:

In [38]:
conversational_rag_chain.invoke(
    input={"input":"Hello, I am Bhushan. What abt u?"},
    config={"configurable":{"session_id":120}}
)

Created chat hist for session id: `120`


{'input': 'Hello, I am Bhushan. What abt u?',
 'chat_history': [],
 'context': [Document(id='85ac837c-f5c2-447e-90ee-704e3af4f03c', metadata={'producer': 'Acrobat Distiller 4.0 for Windows', 'creator': 'Microsoft Word 8.0', 'creationdate': '2000-06-29T10:21:08+11:00', 'source': './assets/pdf_w_text.pdf', 'file_path': './assets/pdf_w_text.pdf', 'total_pages': 1, 'format': 'PDF 1.3', 'title': 'This is a test PDF file', 'author': 'cdaily', 'subject': '', 'keywords': '', 'moddate': '2013-10-28T15:24:13-04:00', 'trapped': '', 'modDate': "D:20131028152413-04'00'", 'creationDate': "D:20000629102108+11'00'", 'page': 0}, page_content="Adobe Acrobat PDF Files\nAdobe® Portable Document Format (PDF) is a universal file format that preserves all\nof the fonts, formatting, colours and graphics of any source document, regardless of\nthe application and platform used to create it.\nAdobe PDF is an ideal format for electronic document distribution as it overcomes the\nproblems commonly encountered with

In [39]:
conversational_rag_chain.invoke(
    input={"input":"What are popular pets?"},
    config={"configurable":{"session_id":120}}
)

{'input': 'What are popular pets?',
 'chat_history': [HumanMessage(content='Hello, I am Bhushan. What abt u?', additional_kwargs={}, response_metadata={}),
  AIMessage(content='Hello Bhushan! I am a helpful AI assistant. I was created by the Gemma team at Google DeepMind. I’m an open-weights model, which means I’m widely available for public use.', additional_kwargs={}, response_metadata={})],
 'context': [Document(id='a1cd727f-9fc6-4585-a6b2-571d5a59a803', metadata={}, page_content='Cats and Dogs are both popular pets.'),
  Document(id='de68b1c6-7416-4a38-94ae-24613b054a0c', metadata={}, page_content='Dogs are known for their loyalty and companionship.'),
  Document(id='b0d1c006-be00-4c3d-b609-ecb71302ef01', metadata={}, page_content='Dogs are loyal and require more attention.'),
  Document(id='46bd6c46-f3d4-48d5-86b6-f2479a957b3e', metadata={}, page_content='Cats are independent and low-maintenance pets.'),
  Document(id='8643d5be-c856-49ae-be4f-c25d99e39a2c', metadata={}, page_conte

In [40]:
conversational_rag_chain.invoke(
    input={"input":"Describe CATS?"},
    config={"configurable":{"session_id":120}}
)

{'input': 'Describe CATS?',
 'chat_history': [HumanMessage(content='Hello, I am Bhushan. What abt u?', additional_kwargs={}, response_metadata={}),
  AIMessage(content='Hello Bhushan! I am a helpful AI assistant. I was created by the Gemma team at Google DeepMind. I’m an open-weights model, which means I’m widely available for public use.', additional_kwargs={}, response_metadata={}),
  HumanMessage(content='What are popular pets?', additional_kwargs={}, response_metadata={}),
  AIMessage(content='According to the provided document, popular pets are Cats and Dogs.', additional_kwargs={}, response_metadata={})],
 'context': [Document(id='46bd6c46-f3d4-48d5-86b6-f2479a957b3e', metadata={}, page_content='Cats are independent and low-maintenance pets.'),
  Document(id='5f3b1177-d2a3-4d4f-b887-5cd9b5a27a95', metadata={}, page_content='Cats are often seen as aloof and mysterious.'),
  Document(id='8643d5be-c856-49ae-be4f-c25d99e39a2c', metadata={}, page_content='Cats are great for small livi

In [41]:
conversational_rag_chain.invoke(
    input={"input":"1. Are they LOYAL? 2. What do I mean my THEY?"},
    config={"configurable":{"session_id":120}}
)

{'input': '1. Are they LOYAL? 2. What do I mean my THEY?',
 'chat_history': [HumanMessage(content='Hello, I am Bhushan. What abt u?', additional_kwargs={}, response_metadata={}),
  AIMessage(content='Hello Bhushan! I am a helpful AI assistant. I was created by the Gemma team at Google DeepMind. I’m an open-weights model, which means I’m widely available for public use.', additional_kwargs={}, response_metadata={}),
  HumanMessage(content='What are popular pets?', additional_kwargs={}, response_metadata={}),
  AIMessage(content='According to the provided document, popular pets are Cats and Dogs.', additional_kwargs={}, response_metadata={}),
  HumanMessage(content='Describe CATS?', additional_kwargs={}, response_metadata={}),
  AIMessage(content='Cats are independent and low-maintenance pets. Cats are often seen as aloof and mysterious. Cats are NOT AT ALL LOYAL.', additional_kwargs={}, response_metadata={})],
 'context': [Document(id='de68b1c6-7416-4a38-94ae-24613b054a0c', metadata={},

## Important:

- Just figured this out
- if u are having LLM call in the chain, but still it is not working, the possible reason is `ChatPromptTemplate`.
- If history is empty, then Template is skipped
- And maybe hence, all further calls as well!!
- So, if LLM is not getting called, try passing some history manually

> My issue
- I was using "messages" for the chat history
- Cause, trimmer expects "messages" key for input
- But, somehow, prompt template was not able to use "messages" key even though it was set explicitly like that.
- Once replaced with "chat_history", it worked.
- Also, for output always use "answer" key (in create hist aware retriever in Conversational RAG)