In [1]:
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_community.document_loaders import PyPDFLoader
from dotenv import load_dotenv
load_dotenv()
from langchain.text_splitter import RecursiveCharacterTextSplitter
from uuid import uuid4
from langchain_pinecone import PineconeVectorStore

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
model = ChatOpenAI(model='gpt-4.1-nano-2025-04-14',max_tokens=500)

In [3]:
loader = PyPDFLoader('BAJHLIP23020V012223.pdf')

In [4]:
doc = loader.lazy_load()

# for i in doc:
#     print(i)

In [5]:
from pinecone import Pinecone, ServerlessSpec
pc =Pinecone()

In [6]:
import tqdm as notebook_tqdm
index_name = "bajajhackrx"

if not pc.has_index(index_name):
    pc.create_index(name=index_name,
                    dimension=3072,
                    metric='cosine',
                    spec=ServerlessSpec(cloud='aws',region='us-east-1'))
    
index_store = pc.Index(index_name)

In [7]:
embedding_model = OpenAIEmbeddings(model='text-embedding-3-large')

In [8]:
store = PineconeVectorStore(index=index_store,embedding=embedding_model)

In [9]:
splitter = RecursiveCharacterTextSplitter(separators='',chunk_size=500,chunk_overlap=100)
doc = splitter.split_documents(doc)

In [10]:
uuids = [str(uuid4()) for _ in range(len(doc))]

In [15]:
retriver = store.as_retriever(search_type="similarity",search_kwargs={'k':5})

In [11]:
store.add_documents(doc,ids=uuids)

['a3c8dc70-9018-4081-b949-0cd666776db5',
 '6fd7f372-e01b-4720-8361-4f56f21173e8',
 '2f70be63-aa79-45fe-9a83-eadf416149a7',
 '8490cffd-61b4-442f-803e-bebc51bdd910',
 'aefab338-08a6-4962-9d52-d31cf4d09aa0',
 'ce5a87ad-479c-447e-a210-859187e5e3c1',
 '57082009-eca8-48e7-8582-a0c145c5baad',
 '9081562c-dfcf-4942-a8fd-b6b8e49270cc',
 '03c7d7d6-3bf9-479a-800f-8aad471ba53e',
 '5ddf19af-96c7-446c-a421-fb333f34ca44',
 'e85fab7f-44f1-464f-8835-86dbc698da34',
 '59dc986c-cdd4-485c-a91c-7055f16f7759',
 '61d7ff0d-298d-478a-9d0e-496756ad5bba',
 '394d8706-dc6a-4ddf-90c9-30ca3fcfd5bf',
 'cb290875-3d5c-41f6-8a8b-3c294a134ac9',
 '8f38b944-a9f1-44fc-a4f8-d9ecbd970605',
 'cd267b4f-54fe-435e-91ab-e8fcb46a5e1c',
 'd9f06c86-5e6a-493e-a1b4-fd7fcf542071',
 '4218b531-bb46-4db1-bebd-975b484cb8c2',
 'e24ab4bd-a679-4307-a670-6574d02271ae',
 '0b63769c-4410-49da-b25e-6072f302d9ed',
 'cad24689-74dc-490b-b95f-c8edd8563b0c',
 '812e37f5-a232-4f01-8e98-d5805798af9b',
 '5778bf5c-df84-492d-9456-a524492baf7b',
 '72b7ca17-ea5d-

In [16]:
index_store.fetch(ids=['03b96341-5b7f-4b2b-9a45-146a1fe24444'])

FetchResponse(namespace='', vectors={'03b96341-5b7f-4b2b-9a45-146a1fe24444': Vector(id='03b96341-5b7f-4b2b-9a45-146a1fe24444', values=[0.00898102205, 0.0396677293, 0.0142803825, 0.0463120714, -0.0274203178, 0.0286599342, -0.0475021042, 0.00528076617, 0.00850376952, 0.05285725, 0.0441799313, -0.09183079, 0.0303210206, 0.0306929052, 0.062823765, 0.0189165473, 0.0410808921, 0.00953884888, -0.0307424888, 0.0343621708, -0.013375462, 0.0627246, 0.0103384014, 0.0200198069, 0.0106173158, -0.0103755901, 0.00259699649, -0.0426676, 0.0314862579, 0.0369901583, 0.0451468341, -0.0119313085, 0.0184083059, -0.0121668363, 0.00212904136, 0.0265277941, 0.0219660047, -0.0147142485, 0.026007155, -0.0241105407, -0.0309408288, -0.0216437038, 0.0101834498, 0.0165364835, -0.00594706042, 0.0681293234, 0.0435353331, -0.0275194868, -0.0271723941, 0.00662575, -0.016697634, -0.0116957817, -0.0227841511, -0.0212718192, -0.00200507976, 0.0180116277, -0.0452212095, 0.0520143099, -0.0143299671, -0.0121482415, -0.012594

In [13]:
store.similarity_search('46-year-old male, knee surgery in Pune, 3-month-old insurance policy',k=4)

[Document(id='421e2b29-ae67-4fbc-a880-0c07d23334cd', metadata={'author': 'Vinay Dhanokar/Head Office Pune/Corporate Communication/General', 'creationdate': '2022-06-16T20:06:13+05:30', 'creator': 'Microsoft® Word 2016', 'moddate': '2022-06-16T20:06:13+05:30', 'page': 45.0, 'page_label': '46', 'producer': 'Microsoft® Word 2016', 'source': 'BAJHLIP23020V012223.pdf', 'total_pages': 49.0}, page_content='UIN- BAJHLIP23020V012223                                 Global Health Care/ Policy Wordings/Page 46 \n \n \nBajaj Allianz General Insurance Co. Ltd.                       \nBajaj Allianz House, Airport Road, Yerawada, Pune - 411 006. Reg. No.: 113 \nFor more details, log on to: www.bajajallianz.com | E-mail: bagichelp@bajajallianz.co.in or \nCall at: Sales - 1800 209 0144 / Service - 1800 209 5858 (Toll Free No.) \nIssuing Office: \n \nGLOBAL HEALTH CARE'),
 Document(id='1c89fb50-0cd8-4291-9273-a730a24915d8', metadata={'author': 'Vinay Dhanokar/Head Office Pune/Corporate Communication/Gene

In [19]:
store.delete(uuids)

In [18]:
from langchain.retrievers import MultiQueryRetriever
Multiqueryretriver = MultiQueryRetriever.from_llm(retriever=store.as_retriever(search_type="similarity",
    search_kwargs={"k": 10}),llm=model)

In [41]:
from langchain_core.prompts import PromptTemplate

In [35]:
template = PromptTemplate(template="""
You are helpfull assistant for conversational question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Give response in short.
    Context: {context}\n
    Question : {question}""",input_variables=['context','question'],validate_template=True)

In [43]:
def full_context(doc):
    context_text = "\n\n".join(doc.page_content for doc in retrieved_docs)
    return context_text

In [44]:
from langchain_core.runnables import RunnableLambda,RunnableParallel,RunnablePassthrough

parallel_chain = RunnableParallel({
    'question':RunnablePassthrough(),
    'context':RunnableLambda(full_context)
})

In [45]:
final_chain =Multiqueryretriver|parallel_chain| template | model
result= final_chain.invoke({
    'question':'What does the policy cover under in-patient hospitalization within India?'
})

In [46]:
result.content

'The policy covers in-patient hospitalization treatment for illness or accidental injury, including room rent for a single private air-conditioned room, ICU expenses at actuals, pre- and post-hospitalization costs, ambulance, daycare procedures, mental illness treatment, and modern medical technology. It excludes expenses related to diagnostics/evaluation only, outpatient dental treatment, and pre-existing diseases. The coverage limits vary by plan (e.g., INR 3.75 million to INR 37.5 million on domestic plans and USD 100,000 to USD 1 million for international plans).'