Load Env

In [115]:
from dotenv import load_dotenv

load_dotenv()

True

Get Embeddings

In [116]:
from langchain_openai import OpenAIEmbeddings

embedding = OpenAIEmbeddings(model="text-embedding-3-large", dimensions=256)

Get Retriever

In [117]:
from langchain_chroma import Chroma
import os 
vector_store = Chroma(
    collection_name="crag_test_3",
    embedding_function=embedding,
    chroma_cloud_api_key=os.getenv("CHROMA_API_KEY"),
    tenant=os.getenv("CHROMA_TENANT"),
    database=os.getenv("CHROMA_DATABASE")
)

top_k = 5
retriever = vector_store.as_retriever(
        search_type="similarity",
        search_kwargs={"k": top_k},
    )

Define LLM

In [118]:
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(
    model="gpt-5-nano", stream_usage=True, temperature=1, reasoning_effort="minimal"
)

Create Contextualize Chain

In [119]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import (
    Runnable,
    RunnablePassthrough,
    RunnableParallel,
    chain,
)
from operator import itemgetter

contextualize_instructions = """Convert the latest user question into a standalone question given the chat history. Don't answer the question, return the question and nothing else (no descriptive text)."""
contextualize_prompt = ChatPromptTemplate.from_messages(
      [
          ("system", contextualize_instructions),
          ("placeholder", "{chat_history}"),
          ("human", "{question}"),
      ]
  )
contextualize_question = contextualize_prompt | llm | StrOutputParser()

@chain
def contextualize_if_needed(input_: dict) -> Runnable:
    if input_.get("chat_history"):
        return contextualize_question
    else:
        return RunnablePassthrough() | itemgetter("question")

Create Retrieval Evaluator Chain

In [None]:
from pydantic import BaseModel, Field

class GraderSchema(BaseModel):
    """ Schema for grading document relevance. 
    Args:
        relevance (str): One of 'high', 'partial', or 'irrelevant'.
    """

    relevance: str = Field(
        description="Documents are relevant to the question, 'high', 'partial' or 'irrelevant'"
    )

gpt5 = ChatOpenAI(model="gpt-5-mini", temperature=1, reasoning_effort="minimal")
structured_llm = gpt5.with_structured_output(GraderSchema)
grader_prompt = """You are a query–document relevance grader for a Retrieval-Augmented Generation (RAG) system.

Your job:
Given a user_query and a document, determine how relevant the document is for answering the query.

Your output must be ONLY one of:
- "high"        → The document directly answers the query OR contains key facts specifically about the SAME entity or topic mentioned in the query.
- "partial"     → The document is related to the general topic but not the specific entity, or provides incomplete context.
- "irrelevant"  → The document does not meaningfully help answer the query OR is about a different entity.

CRITICAL RULES (to avoid false “high” relevance):
1. **Entity mismatch forces "irrelevant".**
   - If the user_query mentions a specific entity (person, company, product, location, etc.)
     AND the document focuses on a different entity → label = "irrelevant".
   - Example:
     Query: “How many products does Company C have?”
     Document: “Company B makes 12 products…”
     → "irrelevant"

2. Keyword overlap alone does NOT count as relevance.
   - Shared words like “company”, “product”, “sales”, etc. do NOT imply relevance.

3. If the document repeats the query without adding actual information → “irrelevant”.

4. If the document is tangentially related but not directly helpful → “partial”.

Respond ONLY in the following JSON:
{{
  "relevance": "high" | "partial" | "irrelevant",
}}

"""

grade_prompt_template = ChatPromptTemplate.from_messages(
    [
        ("system", grader_prompt),
        ("human", "Retrieved document: \n\n {context} \n\n user_query: {question}"),
    ]
)

grader_chain = grade_prompt_template | structured_llm

Create Query Rephrase Chain for Web Search

In [121]:
rephrase_prompt = """You a question re-writer that converts an input question to a better version that is optimized for web search. Look at the input and try to reason about the underlying semantic intent / meaning. Just one question is expected as output. 
Example:
Original Question: "Who is the screenwriter of Death of Batman?"
Rephrased Question: "Death of Batman screenwriter"
"""
rephrase_prompt_template = ChatPromptTemplate.from_messages(
    [
        ("system", rephrase_prompt),
        ("human", "Here is the initial question: \n\n {question} \n Formulate an improved query for web search."),
    ]
)

rephrase_chain = rephrase_prompt_template | llm | StrOutputParser()

Create Knowledge Refinement Chain

In [137]:
from langchain_core.documents import Document
from langchain_text_splitters import RecursiveCharacterTextSplitter


class RefineSchema(BaseModel):
    id: str = Field(
        description="The unique identifier for the document. Leave null if not applicable."
    )
    page_content: str = Field(
        description="The content of the relevant document strip that helps answer the user query."
    )
    source: str = Field(
        description="The source or origin of the document strip, such as a URL or document title."
    )

class RefinedDocs(BaseModel):
    refined_docs: list[RefineSchema] = Field(
        description="A list of relevant document strips that help answer the user query."
    )
    
gpt5 = ChatOpenAI(model="gpt-5-mini", temperature=1, reasoning_effort="minimal")
structured_llm = gpt5.with_structured_output(RefinedDocs)

knowledge_refine_prompt = """You are a knowledge refinement module for a Retrieval-Augmented Generation (RAG) system.

You will be given:
- A user_query
- A list of strips (small text segments from a larger document)

Your task:
Evaluate each strip and decide whether it is relevant or irrelevant to the user_query.

Strict relevance rules:
1. A strip is relevant only if it directly helps answer the user_query.
2. If the query contains a specific entity (company, product, person, location, etc.) 
   and the strip refers to a different entity → irrelevant.
3. Keyword overlap alone does NOT count as relevance.
4. Repeating the query without new information → irrelevant.
5. Only keep strips with clear, meaningful, factual contribution to the answer.
"""
knowledge_refine_prompt_template = ChatPromptTemplate.from_messages(
    [
        ("system", knowledge_refine_prompt),
        ("human", "user_query: {question}\n Document strips: {strips}"),
    ]
)

knowledge_refine_chain = knowledge_refine_prompt_template | structured_llm

def strip_documents(original_context):
    # Strip the documents into small segments
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=500, chunk_overlap=0
    )
    docs = text_splitter.split_documents(original_context)
    return docs


def set_refined_context(question, strips, original_context):
    refine_knowledge = knowledge_refine_chain.invoke({"question": question, "strips": strips})
    original_context.clear()
    for content in refine_knowledge.refined_docs:
        original_context.append(Document(page_content=content.page_content, metadata={"source": content.source}))
    return original_context

Create QA Chain

In [152]:
instruction = "Answer the questions using the given context only"

qa_instructions = instruction + """\n\n{context}."""
qa_prompt = ChatPromptTemplate.from_messages(
  [("system", qa_instructions), ("human", "{question}")]
)

def format_docs(docs):
    return "\n".join(doc.page_content for doc in docs)

formatted_prompt = {
    "question": itemgetter("question") | RunnablePassthrough(),
    "context": lambda x: format_docs(x["context"]),
} | RunnableParallel(prompt=qa_prompt, question=itemgetter("question"))

qa_chain = formatted_prompt | RunnableParallel(
    llm_result=itemgetter("prompt") | llm | StrOutputParser(),
    question=itemgetter("question"),
    )

Retriever Chain

In [153]:
retrieve_docs_chain = itemgetter("question") | retriever

Web Seach Tool

In [154]:
from langchain_community.tools.tavily_search import TavilySearchResults

web_search_tool = TavilySearchResults(max_results=5)

def override_context_with_web_search(original_context, searched_context):
    original_context.clear()
    original_context.extend(searched_context)
    return original_context

def get_web_search_context(question):
    print(f"Performing web search for question: {question}")
    search_results = web_search_tool.invoke({"query": question})
    combined_context = "\n".join(
        result["content"] for result in search_results
    )
    urls = ",".join([result["url"] for result in search_results])
    document = Document(page_content=combined_context, metadata={"source": "web_search", "urls": urls})
    return [document]


def combine_context(original_context, searched_context):
    original_context.extend(searched_context)
    return original_context

Create a Corrective Chain

In [155]:
@chain
def corrective_chain(input_: dict) -> Runnable:
    grader = grader_chain.invoke({"question": input_["question"], "context": format_docs(input_["context"])})
    relevance = grader.relevance.lower()
    print(f"Relevance: {relevance}")

    if relevance == "high":
        # If relevance is high, use refined context only
        return {"strips": lambda x: strip_documents(x["context"]), "context": itemgetter("context"), "question": itemgetter("question")} | RunnablePassthrough.assign(context=lambda x: set_refined_context(x["question"], x["strips"], x["context"])) | qa_chain
    else:
        if relevance == "partial":
          # If relevance is partial, use web search + refined context
          return {"strips": lambda x: strip_documents(x["context"]), "context": itemgetter("context"), "question": itemgetter("question"), "query": itemgetter("question") | rephrase_chain } | RunnablePassthrough.assign(
              search_results=lambda x: get_web_search_context(x["query"])).assign(context=lambda x: set_refined_context(x["question"], x["strips"], x["context"])) .assign(
                  context=lambda x: combine_context(x["context"], x["search_results"]))| qa_chain
        else:
          # If not relevant, use only web search results
          return {"context": itemgetter("context"), "question": itemgetter("question"), "query": itemgetter("question") | rephrase_chain } | RunnablePassthrough.assign(
              search_results=lambda x: get_web_search_context(x["query"])).assign(context=lambda x: override_context_with_web_search(x["context"], x["search_results"])) | qa_chain


Create Final Chain (Contextualize -> Retrieval -> Corrective)

In [156]:
final_chain = (
    RunnablePassthrough.assign(question=contextualize_if_needed)
    .assign(context=retrieve_docs_chain)
    .assign(answer=corrective_chain)
)

Invoke Chain

In [161]:
input = "Howm many types of fixed deposit does Brillar Bank offer?"
result = final_chain.invoke(
        {"question": input, "chat_history": []},
    )

answer = result["answer"]["llm_result"]
source_documents = result["context"]

print("# Answer:\n", answer)
print("\n------------\n")
for doc in source_documents:
    print(doc)
    print("\n")

Relevance: high
# Answer:
 Brillar Bank offers five types of fixed deposit:
- Fixed Deposit (general)
- e-Fixed Deposit
- Flexi Fixed Deposit
- Senior Savers Flexi Fixed Deposit
- Junior Fixed Deposit

(There is also Foreign Currency Fixed Deposit, but it’s listed separately as a type of fixed deposit.)

------------

page_content='• Brillar Bank Personal Banking > • Types of Fixed Deposit Fixed Deposit Earn attractive interest with fixed tenures of up to 60 months e-Fixed Deposit Place your e-Fixed Deposit online via Brillar Connect Flexi Fixed Deposit Enjoy the flexibility of making early partial withdrawals Senior Savers Flexi Fixed Deposit An attractive interest FD account for individuals aged 50 years and above Junior Fixed Deposit An attractive interest FD account for children below' metadata={'source': 'D:\\Code\\langchainjs-test\\assets\\Brillar Bank_Types of Fixed Deposit.pdf'}


page_content='the age of 18 Foreign Currency Fixed Deposit A convenient way to invest in foreign c

In [158]:
input = "What are the interest rates for senior flexi fixed deposit in Brillar Bank?"
result = final_chain.invoke(
        {"question": input, "chat_history": []},
    )

answer = result["answer"]["llm_result"]
source_documents = result["context"]

print("# Answer:\n", answer)
print("\n------------\n")
for doc in source_documents:
    print(doc)
    print("\n")

Relevance: partial
Performing web search for question: Brillar Bank senior flexi fixed deposit interest rates
# Answer:
 The provided information does not mention Brillar Bank or its senior flexi fixed deposit rates. If you have another source or a specific rate table for Brillar Bank, share it and I can help interpret.

------------

page_content='Senior Savers Flexi Fixed Deposit An attractive interest FD account for individuals aged 50 years and above' metadata={'source': 'Brillar Bank_Types of Fixed Deposit.pdf'}


page_content='Interest rates Tenure Interest Rate (p.a.) 12 months 2.50%' metadata={'source': 'Brillar Bank_Flexi Fixed Deposit.pdf'}


page_content='Banks and NBFCs offer FD interest rates of 2.50-8.80% p.a. to senior citizens on tenures ranging from 7 days to 10 years. Most banks and NBFCs offer an additional interest rate of 50 bps over the regular fixed deposit interest rates. However, these preferential interest rates are only offered to resident senior citizen FD d

In [None]:
input = "How many types of fixed deposit product Brillar Bank and Hong Leong Bank have in common?"
result = final_chain.invoke(
        {"question": input, "chat_history": []},
    )

answer = result["answer"]["llm_result"]
source_documents = result["context"]

print("# Answer:\n", answer)
print("\n------------\n")
for doc in source_documents:
    print(doc)
    print("\n")

Relevance: partial
Performing web search for question: Brillar Bank and Hong Leong Bank fixed deposit products common types of fixed deposits comparison
# Answer:
 They have several fixed deposit products in common. Specifically, the types that appear for both Brillar Bank and Hong Leong Bank are:
- Fixed Deposit (standard fixed deposit)
- Senior Savers Flexi Fixed Deposit (Brillar lists this; Hong Leong lists Senior Savers Flexi FD)
- Junior Fixed Deposit (Brillar lists; Hong Leong lists Junior Fixed Deposit)

So, they share 3 types of fixed deposit products.

------------

page_content='Types of Fixed Deposit: e-Fixed Deposit (place online via Brillar Connect); Flexi Fixed Deposit (early partial withdrawals); Senior Savers Flexi Fixed Deposit (for individuals aged 50 and above); Junior Fixed Deposit (for children below 18); Foreign Currency Fixed Deposit.' metadata={'source': 'Brillar Bank_Types of Fixed Deposit.pdf'}


page_content='Flexi Fixed Deposit: flexible FD with auto-sweep b