In [15]:
import os
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from utilities.hashing import calculate_hash
# from utilities.redis_cache import get_cached_summary, cache_summary
from utilities.file_utils import (
    get_doc_by_hash,
    get_file,
    save_doc_data,
    extract_using_textract,
    load_pdf_using_PyPDF,
    save_file,
)
from utilities.redis_cache import get_cached_data
from utilities.llm_utils import (
    answer_from_structured_data,
    create_embeddings,
    generate_summary,
)

In [3]:
doc_url = "https://hackrx.blob.core.windows.net/assets/policy.pdf?sv=2023-01-03&st=2025-07-04T09%3A11%3A24Z&se=2027-07-05T09%3A11%3A00Z&sr=b&sp=r&sig=N4a9OU0w0QXO6AOIBiu4bpl7AXvEZogeT%2FjUHNO7HzQ%3D"

questions= [
    "What is the grace period for premium payment under the National Parivar Mediclaim Plus Policy?",
]

In [4]:
import requests
response = requests.get(doc_url)
if response.status_code != 200:
    raise Exception("Failed to download the document from the provided URL.")

file_bytes = response.content
filename = doc_url.split("/")[-1].split("?")[0]

In [8]:
file_hash = calculate_hash(file_bytes)
os.makedirs("docs", exist_ok=True)
_, ext = os.path.splitext(filename)
file_path = os.path.join("docs", file_hash + ext)
if not os.path.exists(file_path):
    with open(file_path, "wb") as f:
        f.write(file_bytes)
else:
    print("File already exists!")

File already exists!


In [10]:
doc_data = get_doc_by_hash(file_hash)

In [11]:
print(doc_data)

None


In [None]:
summary = get_file(f"docs/summary/{file_hash}.txt")
print(summary)


b"This document outlines the **National Parivar Mediclaim Plus Policy**, an indemnity-based health insurance plan offered by National Insurance Company Limited. It covers medical expenses for illness or injury requiring hospitalization, day care treatment, or domiciliary hospitalization, up to a Floater Sum Insured for the insured family.\r\n\r\nHere's a summary of its key aspects:\r\n\r\n**1. Core Coverage & Benefits:**\r\n*   **In-patient Treatment:** Covers room/ICU charges (with limits based on Plan A/B/C or PPN rates), medical practitioner fees, anesthesia, blood, oxygen, OT charges, surgical appliances, medicines, diagnostic procedures, internally implanted prosthetics, dental treatment due to injury, plastic surgery, medically necessary hormone replacement, vitamins/tonics (if part of treatment), and circumcision (if medically necessary).\r\n*   **Pre & Post Hospitalization:** Covers medical expenses incurred up to 30 days before hospitalization and up to 60 days after discharge

In [None]:

SIMPLIFY_USER_QUERY = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are a smart assistant. Your job is to take user's query and simplify it by removing specificity while preserving the core intent of the question, and rewriting to keep only the most relevant part of the question.",
        ),
        (
            "human",
            "Document Summary:\n{summary}\n\nUser's Question:\n{question}\n\nRewrite this question so that it captures what the user is most likely asking based on the document.",
        ),
    ]
)

def simplify_query(query):
    llm = ChatGoogleGenerativeAI(
        model="gemini-2.0-flash",
        temperature=0,
        max_tokens=None,
        timeout=None,
        max_retries=2,
    )

    clarify_chain = SIMPLIFY_USER_QUERY | llm | StrOutputParser()

    clarified_question = clarify_chain.invoke({
        "summary": summary,
        "question": query
    })

    print(clarified_question)

In [20]:
simplify_query(questions[0])

What is the grace period for policy renewal?
