In [1]:
from langchain.schema import Document
from langchain_community.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings
from langchain.schema import Document
from langchain.chat_models.base import init_chat_model
from langchain.prompts import ChatPromptTemplate
from langchain.schema.output_parser import StrOutputParser
from langchain.schema.runnable import RunnablePassthrough
from pathlib import Path
import json
from typing import Optional, Dict, Any, List, Tuple
import os
from dotenv import load_dotenv
load_dotenv()



  from .autonotebook import tqdm as notebook_tqdm


True

In [2]:
os.environ["OPENAI_API_KEY"]=os.getenv("OPENAI_API_KEY")

# Load Documents

In [3]:
def load_jsonl_as_documents(path: Path) -> List[Document]:
    docs: List[Document] = []
    with path.open("r", encoding="utf-8") as f:
        for line in f:
            rec = json.loads(line)
            page_content = rec.get("page_content", "")
            meta = rec.get("metadata", {})
            docs.append(Document(page_content=page_content, metadata=meta))
    return docs

docs = load_jsonl_as_documents(Path('../output/bima_shree_chunks.jsonl'))
print(f"Loaded {len(docs)} chunks")
print("Sample metadata:", docs[0].metadata)
print("Sample Page Content:", docs[0].page_content)



Loaded 98 chunks
Sample metadata: {'policy_name': 'Money Back Plans', 'product_name': 'LIC‚Äôs Bima Shree', 'plan_no': '748', 'uin_no': '512N316V03', 'page': 0, 'section': 'Introduction', 'chunk_part': 1}
Sample Page Content: LIC‚Äôs Bima Shree plan offers a combination of protection and savings. This plan is specially designed for High Net-worth Individuals. This plan provides financial support for the family in case of unfortunate death of the policyholders during the policy term. Periodic payments shall also be made on survival of the policyholder at specified durations during the policy term and a lump sum payment to the surviving policyholder at the time of maturity. This Plan can be purchased Offline through Licensed agents, Corporate agents, Brokers and Insurance Marketing Firms. Key Features: ‚Ä¢ The plan provides for protection and savings. ‚Ä¢ Limited premium payment. ‚Ä¢ Flexibility to - Choose the premium pay


In [4]:
embeddings=OpenAIEmbeddings(model="text-embedding-3-small")


# Initialize ChromaDb

In [5]:
## Create a Chromdb vector store
persist_directory="./chroma_db"

## Initialize Chromadb with Open AI embeddings
vectorstore=Chroma.from_documents(
    documents=docs,
    embedding=OpenAIEmbeddings(),
    persist_directory=persist_directory,
    collection_name="rag_collection"

)

print(f"Vector store created with {vectorstore._collection.count()} vectors")
print(f"Persisted to: {persist_directory}")

Vector store created with 98 vectors
Persisted to: ./chroma_db


In [6]:
# Cell 5 ‚Äî Simple similarity search (no LLM yet)

def search(query: str,k: int = 4,where: Optional[Dict[str, Any]] = None) -> List[Document]:
    """
    Basic semantic search.
    Use `where` to filter by metadata, e.g. {"UIN": "512N316V03"} or {"product_name": "LIC‚Äôs Bima Shree"}
    """
    results = vectorstore.similarity_search(query, k=k, filter=where)
    for i, r in enumerate(results, 1):
        print(f"\nüîπ Result {i}")
        print("Plan:", r.metadata.get("PlanName") or r.metadata.get("product_name"))
        print("UIN:", r.metadata.get("UIN") or r.metadata.get("uin_no"))
        print("Section:", r.metadata.get("SectionTitle") or r.metadata.get("section") or r.metadata.get("ChunkType"))
        print("Text:\n", (r.page_content or "")[:600], "‚Ä¶")
    return results

def search_with_score(query: str,k: int = 4,where: Optional[Dict[str, Any]] = None) -> List[Tuple[Document, float]]:
    """
    Same as above, but shows cosine distance scores (lower is closer in Chroma).
    """
    results = vectorstore.similarity_search_with_score(query, k=k, filter=where)
    for i, (r, score) in enumerate(results, 1):
        print(f"\nüîπ Result {i} | score={score:.4f} (lower is better)")
        print("Plan:", r.metadata.get("PlanName") or r.metadata.get("product_name"))
        print("UIN:", r.metadata.get("UIN") or r.metadata.get("uin_no"))
        print("Section:", r.metadata.get("SectionTitle") or r.metadata.get("section") or r.metadata.get("ChunkType"))
        print("Text:\n", (r.page_content or "")[:600], "‚Ä¶")
    return results


In [7]:
## Testing
_ = search("What is the minimum entry age and maximum maturity age?")


üîπ Result 1
Plan: LIC‚Äôs Bima Shree
UIN: 512N316V03
Section: Introduction
Text:
 49 years (nearer birthday) for policy term 20 years 45 years (nearer birthday) for policy term 24 years 41 years (nearer birthday) for policy term 28 years e) Maximum Age at Maturity : 69 years (nearer birthday) f) Minimum Basic Sum Assured : Rs. 10,00,000 g) Maximum Basic Sum Assured : No limit (The Basic Sum Assured shall be in multiples of Rs. 50,000/- Date of commencement of risk under the plan: Risk will commence immediately from the date of acceptance of the risk. Date of vesting under the plan: If the policy is issued on the life of a minor, the policy shall automatically vest in the Li ‚Ä¶

üîπ Result 2
Plan: LIC‚Äôs Bima Shree
UIN: 512N316V03
Section: Introduction
Text:
 : a) Policy Term : 14, 16, 18, 20, 24 and 28 years b) Premium Paying Term : (Policy term ‚Äì 4) years c) Minimum Age at entry : 8 years (completed) d) Maximum Age at entry : 55 years (nearer birthday) for policy term 14 years

In [8]:
_ = search("Explain the Survival Benefit schedule for different policy terms.",where={"uin_no": "512N316V03"})   # or {"product_name": "LIC‚Äôs Bima Shree"}


üîπ Result 1
Plan: LIC‚Äôs Bima Shree
UIN: 512N316V03
Section: Introduction
Text:
 des for protection and savings. ‚Ä¢ Limited premium payment. ‚Ä¢ Flexibility to - Choose the premium payment frequency as per convenience. - Choose the period for which protection is required ‚Äì 14, 16, 18, 20, 24 and 28 years. - Opt for payment of benefit in instalments. ‚Ä¢ Survival benefits at specified duration during the policy term. ‚Ä¢ Option to enhance coverage by opting for Rider Benefits on payment of additional premium for the rider benefits. ‚Ä¢ Benefit of attractive High Sum Assured Rebate. ‚Ä¢ Takes care of liquidity needs through loan facility. 1. ELIGIBILITY CONDITIONS AND OTHER RESTRICTION ‚Ä¶

üîπ Result 2
Plan: LIC‚Äôs Bima Shree
UIN: 512N316V03
Section: Introduction
Text:
 ii. ‚ÄúTotal Premiums Paid‚Äù means total of all the premiums paid under the base product, excluding any extra premium, and taxes, if collected explicitly. In case LIC‚Äôs Premium Waiver Benefit Rider is opted f

In [9]:

# If you want scores:
_ = search_with_score("What are the surrender value rules?", k=5)


üîπ Result 1 | score=0.3004 (lower is better)
Plan: LIC‚Äôs Bima Shree
UIN: 512N316V03
Section: 12. SURRENDER
Text:
 The policy can be surrendered after completion of first policy year provided one full year‚Äôs premium(s) has been paid. However, the policy shall acquire Guaranteed Surrender Value on payment of atleast two full years‚Äô premiums and Special Surrender Value after completion of first policy year provided one full year‚Äôs premium(s) has been paid. On surrender of an in-force or paid-up policy, the Corporation shall pay the Surrender Value equal to higher of Guaranteed Surrender Value and Special Surrender Value. Guaranteed Surrender Value payable during the policy term shall be equal to the total pr ‚Ä¶

üîπ Result 2 | score=0.3102 (lower is better)
Plan: LIC‚Äôs Bima Shree
UIN: 512N316V03
Section: 12. SURRENDER
Text:
 22 27.06% 20.85% 23 30.00% 21.99% 24 35.00% 23.38% 25 25.05% 26 27.06% 27 30.00% 28 35.00% The Special Surrender Value shall be reviewed annually in li

In [10]:
_=search_with_score('For policy terms 20, 24, and 28 years, what percentage of Basic Sum Assured is paid as Sum Assured on Maturity?')


üîπ Result 1 | score=0.1696 (lower is better)
Plan: LIC‚Äôs Bima Shree
UIN: 512N316V03
Section: Introduction
Text:
 r various policy terms is as below: Policy Term Percentage of Basic Sum Assured (BSA) 14 years 30% of Basic Sum Assured on each of 10th and 12th policy anniversary 16 years 35% of Basic Sum Assured on each of 12th and 14th policy anniversary 18 years 40% of Basic Sum Assured on each of 14th and 16th policy anniversary 20 years 45% of Basic Sum Assured on each of 16th and 18th policy anniversary 24 years 45% of Basic Sum Assured on each of 20th and 22nd policy anniversary 28 years 45% of Basic Sum Assured on each of 24th and 26th policy anniversary C. Maturity Benefit: On the life assured survi ‚Ä¶

üîπ Result 2 | score=0.1886 (lower is better)
Plan: LIC‚Äôs Bima Shree
UIN: 512N316V03
Section: Introduction
Text:
 ving to the end of the policy term, provided all due premiums have been paid, ‚ÄúSum Assured on Maturity‚Äù along with accrued Guaranteed Additions and Loyalty

# Retriver


In [19]:
## Convert vector store to retriever
retriever=vectorstore.as_retriever(
    search_type="mmr", 
    search_kwarg={"k":3} ## Retrieve top 5 relevant chunks
)
retriever

VectorStoreRetriever(tags=['Chroma', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.chroma.Chroma object at 0x144b34cb0>, search_type='mmr', search_kwargs={})

# Model Initilization

In [20]:
llm=init_chat_model("openai:gpt-3.5-turbo")
llm

ChatOpenAI(client=<openai.resources.chat.completions.completions.Completions object at 0x14642eab0>, async_client=<openai.resources.chat.completions.completions.AsyncCompletions object at 0x14642f800>, root_client=<openai.OpenAI object at 0x14642ee40>, root_async_client=<openai.AsyncOpenAI object at 0x145c30e90>, model_kwargs={}, openai_api_key=SecretStr('**********'), stream_usage=True)

In [13]:
# llm.invoke("What is AI")

In [21]:
RAG_PROMPT = ChatPromptTemplate.from_messages([
    (
        "system",
        "You are an expert assistant for Indian life insurance policies (especially LIC). "
        "You MUST answer strictly and only from the provided context chunks. "
        "Never use outside knowledge or guess. If the answer is not clearly stated "
        "in the context, reply exactly with: 'I don't know based on the provided policy context.'\n\n"
        "Answering rules:\n"
        "1) First, carefully read the question and identify what is being asked "
        "(e.g., percentage, age limit, term, definition, conditions, etc.).\n"
        "2) Use ONLY the given context text to find the answer. Do not infer or calculate "
        "anything that is not explicitly stated.\n"
        "3) If the question asks for specific numeric details (percentages, ages, terms, sums, etc.):\n"
        "   - Copy the numbers and their units/phrases exactly as written in the context.\n"
        "   - Do NOT invent or average values.\n"
        "   - Keep the answer short and direct (one or two lines max).\n"
        "4) If the question asks for explanations, features, benefits, or terms & conditions "
        "(e.g., 'what are', 'explain', 'describe', 'terms and conditions'):\n"
        "   - Provide a clear, structured summary using bullet points where helpful.\n"
        "   - Combine information from all relevant context chunks.\n"
        "   - Do NOT add your own assumptions.\n"
        "5) If multiple policies or plans appear in the context, clearly mention the correct "
        "Plan Name and UIN when you answer.\n"
        "6) Never mention the words 'context chunk' or 'document' in the answer. "
        "Just answer as if you are the policy expert."
    ),
    (
        "user",
        "Question:\n{question}\n\n"
        "Context:\n{context}\n\n"
        "Based on ONLY the above context, provide the best possible answer following all rules."
    )
])


def format_docs(docs):
    out = []
    for d in docs:
        meta = d.metadata or {}
        tag = f"[{meta.get('PlanName') or meta.get('product_name','')}, UIN={meta.get('UIN') or meta.get('uin_no','')}, Section={meta.get('SectionTitle') or meta.get('section','')}]"
        out.append(f"{tag}\n{d.page_content}")
    return "\n\n---\n\n".join(out)

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | RAG_PROMPT
    | llm
    | StrOutputParser()
)
rag_chain

{
  context: VectorStoreRetriever(tags=['Chroma', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.chroma.Chroma object at 0x144b34cb0>, search_type='mmr', search_kwargs={})
           | RunnableLambda(format_docs),
  question: RunnablePassthrough()
}
| ChatPromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], input_types={}, partial_variables={}, template="You are an expert assistant for Indian life insurance policies (especially LIC). You MUST answer strictly and only from the provided context chunks. Never use outside knowledge or guess. If the answer is not clearly stated in the context, reply exactly with: 'I don't know based on the provided policy context.'\n\nAnswering rules:\n1) First, carefully read the question and identify what is being asked (e.g., percentage, age limit, term, definition, conditions, etc.).\n2) Use ONLY the given context 

In [22]:

question_2 = "Explain the Survival Benefit schedule for different policy terms."
answer_2 = rag_chain.invoke(question_2)
print("\nQ:", question_2)
print("A:", answer_2)


Q: Explain the Survival Benefit schedule for different policy terms.
A: Survival benefits are payable at specified durations during the policy term of LIC's Bima Shree for policy terms of 14, 16, 18, 20, 24, and 28 years. However, specific details of the Survival Benefit schedule for these different policy terms are not provided in the context.


In [23]:

question_2 = "What is payable at the end of the policy term if all premiums are paid and what are there percentages?"
answer_2 = rag_chain.invoke(question_2)
print("\nQ:", question_2)
print("A:", answer_2)


Q: What is payable at the end of the policy term if all premiums are paid and what are there percentages?
A: At the end of the policy term, if all premiums are paid, the following percentages of the Basic Sum Assured will be payable:
- 40% for a policy term of 14 years
- 30% for a policy term of 16 years
- 20% for a policy term of 18 years
- 10% for policy terms of 20, 24, and 28 years.


In [24]:
question_3 = "For policy terms 20, 24, and 28 years, what percentage of Basic Sum Assured is paid as Sum Assured on Maturity for LIC's Bhima Shree Policy?"
answer_3 = rag_chain.invoke(question_3)
print("\nQ:", question_3)
print("A:", answer_3)



Q: For policy terms 20, 24, and 28 years, what percentage of Basic Sum Assured is paid as Sum Assured on Maturity for LIC's Bhima Shree Policy?
A: For LIC's Bhima Shree Policy:
- 20 years policy term: 45% of Basic Sum Assured on each of 16th and 18th policy anniversary.
- 24 years policy term: 45% of Basic Sum Assured on each of 20th and 22nd policy anniversary.
- 28 years policy term: 45% of Basic Sum Assured on each of 24th and 26th policy anniversary.


In [25]:
question_4 = "What are the Surrender Policies and what are there terms and conditions?"
answer_4 = rag_chain.invoke(question_4)
print("\nQ:", question_4)
print("A:", answer_4)



Q: What are the Surrender Policies and what are there terms and conditions?
A: **Surrender Policies for LIC's Bima Shree (UIN: 512N316V03)**

- Surrender Policy Terms and Conditions:
  - Policy can be surrendered after completing the first policy year, given one full year‚Äôs premium has been paid.
  - Guaranteed Surrender Value is acquired after paying at least two full years‚Äô premiums.
  - Special Surrender Value can be obtained after the first policy year, with payment of one full year's premium.
  - Surrendering an in-force or paid-up policy results in receiving the higher value between Guaranteed Surrender Value and Special Surrender Value.
  - Guaranteed Surrender Value payable during the policy term is equivalent to the total premiums paid (excluding extra premiums, taxes, and rider premiums if opted).
