In [1]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings

embedding_model = GoogleGenerativeAIEmbeddings(
    model="models/embedding-001")
embedding_vector = embedding_model.embed_query(
    "Artificial Intelligence is transforming the world.")
print(embedding_vector)

[0.03653311729431152, -0.0036435893271118402, -0.04862387105822563, -0.009088718332350254, 0.062586210668087, 0.03536169230937958, 0.021346230059862137, -0.016231535002589226, 0.0029035196639597416, 0.04641566053032875, 0.011649583466351032, 0.03793774172663689, -0.013172326609492302, 0.021497223526239395, -0.00037192986928857863, -0.05271148681640625, 0.014696195721626282, 0.045456018298864365, 0.011687371879816055, 0.019356325268745422, 0.014189374633133411, -0.010052349418401718, 0.009420868940651417, -0.022700617089867592, 0.012649854645133018, -0.03522105515003204, 0.01691955327987671, -0.04934024438261986, -0.020393552258610725, 0.01376526802778244, -0.022879594936966896, 0.02819780819118023, -0.02548406459391117, 0.013994702138006687, 0.02382257767021656, -0.051307935267686844, 0.0015836667735129595, 0.006263628136366606, 0.009380538016557693, -0.02481912449002266, 0.00696534151211381, -0.08085664361715317, -0.006354163400828838, 0.03180030360817909, -0.007797748316079378, -0.00

In [2]:
from langchain_community.document_loaders import PyPDFLoader

doc1 = PyPDFLoader("files/doc1.pdf").load()
doc2 = PyPDFLoader("files/doc2.pdf").load()
doc3 = PyPDFLoader("files/doc3.pdf").load()
doc4 = PyPDFLoader("files/doc4.pdf").load()
doc5 = PyPDFLoader("files/doc5.pdf").load()
documents = doc1 + doc2 +  doc3 + doc4 + doc5


In [3]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import PromptTemplate

In [4]:
split = RecursiveCharacterTextSplitter(chunk_size =1000, chunk_overlap = 20)
chunks = split.split_documents(documents) 


In [5]:
vector_store = FAISS.from_documents(chunks,embedding_model)

In [6]:
#vector_store.index_to_docstore_id
retriever = vector_store.as_retriever(search_type="mmr", search_kwargs={"k": 3, "lambda_mult": 0.5})

retriever


VectorStoreRetriever(tags=['FAISS', 'GoogleGenerativeAIEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x00000243CB5DDDD0>, search_type='mmr', search_kwargs={'k': 3, 'lambda_mult': 0.5})

In [7]:
retriever.invoke("What is insurance?")

[Document(id='563d6e52-5f0a-48a5-b75c-49f0807bff84', metadata={'producer': 'Microsoft® Word 2016', 'creator': 'Microsoft® Word 2016', 'creationdate': '2022-06-16T20:06:13+05:30', 'author': 'Vinay Dhanokar/Head Office Pune/Corporate Communication/General', 'moddate': '2022-06-16T20:06:13+05:30', 'source': 'files/doc1.pdf', 'total_pages': 49, 'page': 1, 'page_label': '2'}, page_content='Emergency care means management for an Illness or Injury which results in symptoms which occur suddenly and \nunexpectedly, and requires immediate care by a Medical Practitioner to prevent death or serious long term \nimpairment of the Insured person’s health.'),
 Document(id='f236c379-62b1-413f-b3a7-d6c2304533fa', metadata={'producer': 'Microsoft® Word 2013', 'creator': 'Microsoft® Word 2013', 'creationdate': '2022-07-12T15:10:23+05:30', 'author': 'Padmapriya C-Manager-Product Management-HO-Chola Ms', 'moddate': '2022-07-12T15:10:23+05:30', 'source': 'files/doc2.pdf', 'total_pages': 101, 'page': 89, 'pag

In [8]:
from langchain.prompts import PromptTemplate
from dotenv import load_dotenv
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.chains import RetrievalQA

load_dotenv()

model = ChatGoogleGenerativeAI(model="gemini-2.5-pro")


prompt = PromptTemplate(
    input_variables=["context", "query"],
    template="""
You are a legal assistant. Based on the following source documents (in context), answer the legal query provided.

Instructions:
- Respond ONLY in JSON format.
- Do NOT include any explanation, preamble, markdown, or extra formatting.
- Use the following format exactly:

{{
  "query": "{{query}}",
  "response": "<your legal answer in plain English>",
  "citations": [
    {{
      "source_text": "<exact clause or text from the source document>"
    }}
  ],
  "status": "SUCCESS"  // or "NO_RELEVANT_CLAUSES"
}}

Context:
{context}
"""
)




In [9]:
Query = "46-year-old male, knee surgery in Pune, 3-month-old insurance policy"
retriever_doc = retriever.invoke(Query)
retriever_doc

[Document(id='c8c15b9d-dd19-44ea-b0b8-9829a9f967fe', metadata={'producer': 'Corel PDF Engine Version 24.0.0.301', 'creator': 'CorelDRAW 2022', 'creationdate': '2022-06-30T18:58:22+05:30', 'moddate': '2022-06-30T18:58:22+05:30', 'author': 'akshay', 'title': '20220507001-Golden Shield-POLICY WORDING.cdr', 'source': 'files/doc5.pdf', 'total_pages': 31, 'page': 27, 'page_label': '28'}, page_content='Practitioner and follow the advice and treatment \nthat he recommends.\nNA as \nexpenses have \nnot crossed \nvoluntary \ndeductible \namount\n-'),
 Document(id='01a12fc4-94f0-4ca4-a09a-adccb6b30ce6', metadata={'producer': 'Microsoft® Word 2016', 'creator': 'Microsoft® Word 2016', 'creationdate': '2022-06-16T20:06:13+05:30', 'author': 'Vinay Dhanokar/Head Office Pune/Corporate Communication/General', 'moddate': '2022-06-16T20:06:13+05:30', 'source': 'files/doc1.pdf', 'total_pages': 49, 'page': 34, 'page_label': '35'}, page_content='41. The wordings “The Policyholder may be changed during the Po

In [10]:
content_text = "\n\n".join(doc.page_content for doc in retriever_doc)
content_text

'Practitioner and follow the advice and treatment \nthat he recommends.\nNA as \nexpenses have \nnot crossed \nvoluntary \ndeductible \namount\n-\n\n41. The wordings “The Policyholder may be changed during the Policy Period only in case of his/her demise or him/her \nmoving out of India.” In the Standard Terms and Conditions shall not be applicable to this Policy as this Policy also \ncovers the international Health coverage.\n\nfirst diagnoses or detection \nix) Treating doctors certificate regarding missing information in case histories e.g. Circumstance of \ninjury and Alcohol or drug influence at the time of accident \nx) Copy of settlement letter from other insurance company or TPA \nxi) Stickers and invoice of implants used during surgery \nxii) Copy of MLC (Medico legal case) records and FIR (First information report), in case of claims \narising out of an accident \nxiii) Regulatory requirements as amended from time to time, currently mandatory NEFT (to enable \ndirect credit o

In [16]:
final_prompt = prompt.format(context=content_text, query="Is my claim payable?")



In [19]:
import json
answer = model.invoke(final_prompt)
#result_json = json.loads(answer.content)  # Convert string to Python dict
print(repr(answer.content))


'```json\n{\n  "query": "Can I change the policyholder if they move to another country?",\n  "response": "The standard condition, which states that a policyholder may be changed if they move out of India, is explicitly not applicable to this particular policy. This is because the policy also provides international health coverage. The documents do not specify the alternative conditions for changing the policyholder.",\n  "citations": [\n    {\n      "source_text": "The wordings “The Policyholder may be changed during the Policy Period only in case of his/her demise or him/her moving out of India.” In the Standard Terms and Conditions shall not be applicable to this Policy as this Policy also covers the international Health coverage."\n    }\n  ],\n  "status": "SUCCESS"\n}\n```'


In [21]:


import json

raw_content = answer.content.strip()

# Remove triple backticks & optional 'json' label
if raw_content.startswith("```"):
    raw_content = raw_content.strip("`")  # remove all backticks
    raw_content = raw_content.replace("json\n", "", 1).strip()

result_json = json.loads(raw_content)
print(result_json)


{'query': 'Can I change the policyholder if they move to another country?', 'response': 'The standard condition, which states that a policyholder may be changed if they move out of India, is explicitly not applicable to this particular policy. This is because the policy also provides international health coverage. The documents do not specify the alternative conditions for changing the policyholder.', 'citations': [{'source_text': 'The wordings “The Policyholder may be changed during the Policy Period only in case of his/her demise or him/her moving out of India.” In the Standard Terms and Conditions shall not be applicable to this Policy as this Policy also covers the international Health coverage.'}], 'status': 'SUCCESS'}
