In [12]:
!pip install openai langchain faiss-cpu PyMuPDF python-dotenv



In [13]:
import fitz

def extract_text_from_pdf(path):
    doc = fitz.open(path)
    return "\n".join([page.get_text() for page in doc])

pdf_paths = [
    "/content/BAJHLIP23020V012223.pdf",
    "/content/CHOTGDP23004V012223.pdf",
    "/content/EDLHLGA23009V012223.pdf",
    "/content/HDFHLIP23024V072223.pdf",
    "/content/ICIHLIP22012V012223.pdf",
]

documents = [extract_text_from_pdf(p) for p in pdf_paths]

In [14]:
from langchain.text_splitter import CharacterTextSplitter
from langchain_core.documents import Document

splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
chunks = []
for i, doc in enumerate(documents):
    doc_chunks = splitter.create_documents([doc])
    for chunk in doc_chunks:
        chunk.metadata = {"source": pdf_paths[i]}
    chunks.extend(doc_chunks)



In [15]:
!pip install -U langchain-community

from langchain_community.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
import os

os.environ["OPENAI_API_KEY"] = "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"
embedding_model = OpenAIEmbeddings()
db = FAISS.from_documents(chunks, embedding_model)



  embedding_model = OpenAIEmbeddings()


In [16]:
!pip install -U langchain-openai



In [17]:
from langchain.chains import RetrievalQA
from langchain_openai import ChatOpenAI

qa_chain = RetrievalQA.from_chain_type(
    llm=ChatOpenAI(model_name="gpt-4", temperature=0),
    retriever=db.as_retriever(search_type="similarity", k=4),
    return_source_documents=True
)

In [18]:
query = "46M, knee surgery, Pune, 3-month policy"
structured_prompt = f"""
Based on the following health insurance policy documents, answer this query:

Query: {query}

Return the result as JSON:
{{
  "decision": "approved/rejected",
  "amount": "₹amount or NA",
  "justification": "reason",
  "clause_reference": "Clause number"
}}
"""
response = qa_chain.invoke(structured_prompt)
print(response["result"])

{
  "decision": "rejected",
  "amount": "NA",
  "justification": "The policy tenure is 1 year and the policy does not cover joint replacement surgery within the first 2 years of the policy period.",
  "clause_reference": "Tenure of Policy, Exclusion 27"
}


In [19]:
%%writefile requirements.txt
openai
langchain
langchain-community
langchain-openai
faiss-cpu
PyMuPDF
python-dotenv
streamlit


Writing requirements.txt


In [20]:
%%writefile .gitignore
.env
__pycache__/
.ipynb_checkpoints/


Writing .gitignore


In [21]:
%%writefile .env.example
OPENAI_API_KEY=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


Writing .env.example


In [22]:
%%writefile README.md
# 🧠 HackRx Policy QA System

This app uses GPT-4 and LangChain to extract answers from unstructured health policy PDFs based on user queries.

## 💡 Sample Query


Writing README.md


In [23]:
%%writefile app.py
import streamlit as st
from langchain.chains import RetrievalQA
from langchain.vectorstores import FAISS
from langchain_openai import ChatOpenAI
import os

st.set_page_config(page_title="Policy QA", layout="centered", initial_sidebar_state="collapsed")

st.markdown("""
<style>
body {background-color: #0f1117; color: white;}
section.main > div {background-color: #1e1e1e; padding: 2em; border-radius: 1em;}
</style>
""", unsafe_allow_html=True)

st.title("🧐 Health Policy Query Enhancer")
query = st.text_input("Enter your query:")

if query:
    os.environ["OPENAI_API_KEY"] = st.secrets["OPENAI_API_KEY"]
    llm = ChatOpenAI(model_name="gpt-4", temperature=0)
    qa_chain = RetrievalQA.from_chain_type(
        llm=llm,
        retriever=db.as_retriever(search_type="similarity", k=4),
        return_source_documents=True
    )
    structured_prompt = f"""
    Based on the following insurance policy documents, answer this query:

    Query: {query}

    Return the result as JSON:
    {{
      "decision": "approved/rejected",
      "amount": "₹amount or NA",
      "justification": "reason",
      "clause_reference": "Clause number"
    }}
    """
    response = qa_chain.invoke(structured_prompt)
    st.json(response["result"])


Writing app.py
