In [15]:
!pip install pdfplumber networkx transformers accelerate sentencepiece torch requests




In [30]:
from google.colab import files
import pdfplumber

uploaded = files.upload()      # select multiple PDFs
pdf_files = list(uploaded.keys())

def extract_text(path):
    text = ""
    with pdfplumber.open(path) as pdf:
        for page in pdf.pages:
            t = page.extract_text()
            if t:
                text += t + "\n"
    return text

documents = {}

for pdf in pdf_files:
    documents[pdf] = extract_text(pdf)

print("Loaded documents:", list(documents.keys()))


Saving commercial-insurance-policy.pdf to commercial-insurance-policy (3).pdf
Saving COMMERCIAL PACKAGE INSURANCE POLICY - Policy Wording.pdf to COMMERCIAL PACKAGE INSURANCE POLICY - Policy Wording (3).pdf
Loaded documents: ['commercial-insurance-policy (3).pdf', 'COMMERCIAL PACKAGE INSURANCE POLICY - Policy Wording (3).pdf']


In [31]:
import requests, json

OPENROUTER_API_KEY = "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"

BASE_URL = "https://openrouter.ai/api/v1/chat/completions"

MODEL = "tngtech/deepseek-r1t2-chimera:free"


In [32]:
def call_openrouter(prompt):
    payload = {
        "model": MODEL,
        "messages": [{"role": "user", "content": prompt}]
    }

    res = requests.post(
        BASE_URL,
        headers={
            "Authorization": f"Bearer {OPENROUTER_API_KEY}",
            "Content-Type": "application/json"
        },
        data=json.dumps(payload)
    )

    res.raise_for_status()
    return res.json()["choices"][0]["message"]["content"]


In [33]:
def generate_triplets(text):
    prompt = f"""
Extract insurance knowledge as triples.

Format STRICTLY:
(HEAD, RELATION, TAIL)

Relations ONLY:
COVERS, EXCLUDES, LIMIT, CONDITION, DEFINITION

Text:
{text}
"""
    return call_openrouter(prompt)


In [34]:
all_triplets_text = {}
for name, text in documents.items():
    print(f"Processing: {name}")
    all_triplets_text[name] = generate_triplets(text)


Processing: commercial-insurance-policy (3).pdf
Processing: COMMERCIAL PACKAGE INSURANCE POLICY - Policy Wording (3).pdf


In [39]:
import re, networkx as nx

def parse_triplets(text):
    pattern = r"\(([^,]+),\s*([^,]+),\s*([^)]+)\)"
    matches = re.findall(pattern, text)
    return [(h.strip(), r.strip(), t.strip()) for h,r,t in matches]

all_triplets = {
    name: parse_triplets(txt)
    for name, txt in all_triplets_text.items()
}

G = nx.DiGraph()

for name, triplets in all_triplets.items():
    for h,r,t in triplets:
        G.add_edge(h, t, relation=r, source=name)

print("Nodes:", G.number_of_nodes())
print("Edges:", G.number_of_edges())


Nodes: 159
Edges: 103


In [40]:
import re

def classify_question(question):
    prompt = f"""
You classify insurance questions.

Return:

ENTITY: <main concept>
TYPE: <coverage|exclusion|limit|condition|definition>
SUMMARY: <short explanation>

Question: {question}
"""

    text = call_openrouter(prompt)

    entity  = re.search(r"ENTITY:\s*(.+)", text)
    qtype   = re.search(r"TYPE:\s*(.+)", text)
    summary = re.search(r"SUMMARY:\s*(.+)", text)

    return {
        "entity": entity.group(1).strip() if entity else None,
        "type": qtype.group(1).strip().lower() if qtype else None,
        "summary": summary.group(1).strip() if summary else None,
        "raw": text
    }


In [41]:
classify_question("Does the policy cover fire?")


{'entity': 'Fire',
 'type': 'coverage',
 'summary': "Asks whether fire damage is included under the policy's protection.",
 'raw': "ENTITY: Fire  \nTYPE: Coverage  \nSUMMARY: Asks whether fire damage is included under the policy's protection."}

In [42]:
def search_graph(entity, query_type):
    mapping = {
        "coverage": "COVERS",
        "exclusion": "EXCLUDES",
        "limit": "LIMIT",
        "condition": "CONDITION",
        "definition": "DEFINITION"
    }

    if not entity or query_type not in mapping:
        return []

    relation = mapping[query_type]
    entity_lower = entity.lower()

    matches = []

    for u, v, data in G.edges(data=True):
        if data["relation"] == relation and entity_lower in v.lower():
            matches.append((u, relation, v, data["source"]))

    return matches


In [43]:
def answer_with_graph(question, facts):
    if not facts:
        return "Nothing in the uploaded policies specifically mentions this."

    fact_text = "\n".join(str(f) for f in facts)

    prompt = f"""
Answer the user's question ONLY using the policy facts provided.

Facts:
{fact_text}

Question:
{question}

If the policy does not clearly mention something, say so directly.
"""

    return call_openrouter(prompt)


In [44]:
def ask(question):
    classification = classify_question(question)
    facts = search_graph(classification["entity"], classification["type"])
    answer = answer_with_graph(question, facts)

    return {
        "classification": classification,
        "facts": facts,
        "answer": answer
    }


In [45]:
ask("Does the policy cover fire?")


{'classification': {'entity': 'Fire',
  'type': 'coverage',
  'summary': 'Asks whether fire damage is included as a covered peril under the policy.',
  'raw': 'ENTITY: Fire  \nTYPE: Coverage  \nSUMMARY: Asks whether fire damage is included as a covered peril under the policy.'},
 'facts': [('PROPERTY DAMAGE SECTION',
   'COVERS',
   'FIRE LIGHTNING EXPLOSION',
   'commercial-insurance-policy (3).pdf'),
  ('Section I',
   'COVERS',
   'Fire, Explosion, Lightning, Earthquake, Storm, Flood, Subsidence, Impact Damage, Riot, Terrorism*',
   'COMMERCIAL PACKAGE INSURANCE POLICY - Policy Wording (3).pdf')],
 'answer': 'Yes, the policy covers fire.  \n\nBoth policy documents explicitly list "Fire" as a covered peril:\n- The "PROPERTY DAMAGE SECTION" in **commercial-insurance-policy (3).pdf** includes coverage for "FIRE LIGHTNING EXPLOSION".  \n- "Section I" in **COMMERCIAL PACKAGE INSURANCE POLICY - Policy Wording (3).pdf** specifically lists "Fire" as a covered event.  \n\nThere is no ambigui

In [46]:
def summarize_policy(name, text):
    prompt = f"""
Provide a concise executive summary of this insurance policy.

Focus on:
- what it mainly protects
- who it's for
- notable strengths
- any important warnings

Keep it under 6–8 sentences.

Policy:
{text}
"""
    return call_openrouter(prompt)


In [47]:
policy_summaries = {}

for name, text in documents.items():
    print(f"\n=== {name} ===")
    summary = summarize_policy(name, text)
    policy_summaries[name] = summary
    print(summary)



=== commercial-insurance-policy (3).pdf ===
**Executive Summary:**  

=== COMMERCIAL PACKAGE INSURANCE POLICY - Policy Wording (3).pdf ===
**Executive Summary:**  


In [48]:
policy_knowledge = {}

for name, triplets in all_triplets.items():
    data = {
        "COVERS": [],
        "EXCLUDES": [],
        "LIMIT": [],
        "CONDITION": [],
        "DEFINITION": []
    }

    for h, r, t in triplets:
        if r in data:
            data[r].append((h, t))

    policy_knowledge[name] = data


In [49]:
from pprint import pprint

for name, data in policy_knowledge.items():
    print(f"\n===== {name} =====")
    pprint(data)



===== commercial-insurance-policy (3).pdf =====
{'CONDITION': [('POLICY', 'FAIR PRESENTATION OF RISKS REQUIRED'),
               ('POLICY', 'REASONABLE CARE TO PREVENT DAMAGE'),
               ('POLICY', 'NOTIFICATION OF UNOCCUPIED BUILDINGS'),
               ('POLICY', 'ARBITRATION FOR DISPUTED CLAIMS'),
               ('POLICY', 'SANCTIONS EXCLUSION FOR PROHIBITED JURISDICTIONS'),
               ('FRAUDULENT CLAIMS',
                'POLICY CANCELLATION AND REPAYMENT REQUIRED'),
               ('INDEX-LINKING', 'ADJUSTS SUMS INSURED PER APPROVED INDICES')],
 'COVERS': [('COMMERCIAL INSURANCE POLICY', 'PROPERTY DAMAGE'),
            ('COMMERCIAL INSURANCE POLICY', 'BUSINESS INTERRUPTION'),
            ('COMMERCIAL INSURANCE POLICY', 'LIABILITIES'),
            ('PROPERTY DAMAGE SECTION', 'FIRE LIGHTNING EXPLOSION'),
            ('PROPERTY DAMAGE SECTION', 'AIRCRAFT DAMAGE'),
            ('PROPERTY DAMAGE SECTION', 'RIOT DAMAGE'),
            ('BUSINESS INTERRUPTION SECTION', 'LOSS OF

In [50]:
def compare_policies(policy_knowledge):
    prompt = """
Compare the following insurance policies.

Explain clearly:

- Overall positioning
- Key coverages (differences)
- Major exclusions compared
- Limits (who offers more / less)
- Important conditions
- Who each policy is suitable for

Be objective, structured, and precise.
"""

    for name, data in policy_knowledge.items():
        prompt += f"\n\nPOLICY: {name}\n"
        prompt += f"COVERS: {data['COVERS']}\n"
        prompt += f"EXCLUDES: {data['EXCLUDES']}\n"
        prompt += f"LIMITS: {data['LIMIT']}\n"
        prompt += f"CONDITIONS: {data['CONDITION']}\n"
        prompt += f"DEFINITIONS: {data['DEFINITION']}\n"

    return call_openrouter(prompt)


In [51]:
comparison_report = compare_policies(policy_knowledge)
print(comparison_report)


Here is a structured, objective comparison of the two commercial insurance policies:

---

### **1. Overall Positioning**
| **Policy 1 (UK Policy)** | **Policy 2 (India-Based Policy)** |
|--------------------------|-----------------------------------|
| Targeted at **UK businesses** (England, Scotland, Wales, NI, Channel Islands, Isle of Man). Broad coverage with specialized extensions (e.g., Terrorism, Equipment Breakdown, Assault). | Designed for **Indian businesses**, emphasizing **property and transit risks** with sub-limits tied to Sum Insured percentages. Includes earthquake/flood and offers a waiver for underinsurance. |

---

### **2. Key Coverages Compared**
| **Coverage**              | **Policy 1**                                                                 | **Policy 2**                                                                 |
|---------------------------|------------------------------------------------------------------------------|----------------------------

In [52]:
def analyze_requirements(user_input):
    prompt = f"""
Extract the user's insurance needs.

Return:

- Business type
- Main risks
- Must-have coverages
- Deal-breaker exclusions
- Desired limits
- Budget sensitivity (if mentioned)

User input:
{user_input}
"""

    return call_openrouter(prompt)


In [53]:
user_requirements = analyze_requirements(
"""
I run a small electronics retail shop.
I am worried about fire, theft and damage due to short-circuit.
I don’t want policies that exclude electrical damage.
And I need at least 25 lakh coverage.
"""
)

print(user_requirements)


**Insurance Needs Summary:**

1. **Business Type:**  
   Electronics Retail (Small Brick-and-Mortar Shop)  

2. **Main Risks:**  
   - Fire  
   - Theft/Burglary  
   - Electrical Damage (e.g., short-circuit-related damage to inventory or property)  

3. **Must-Have Coverages:**  
   - **Property Insurance** (covering physical assets like inventory, equipment, and store premises)  
   - **Theft/Burglary Coverage**  
   - **Electrical/Fire Damage Coverage** (explicitly covering short-circuit risks)  
   - **Equipment Breakdown Coverage** (optional but recommended for electronics businesses)  

4. **Deal-Breaker Exclusions:**  
   - Policies that **exclude electrical damage** (short-circuit damage must be covered).  

5. **Desired Limits:**  
   - Minimum coverage of **₹25 lakh** (for property, inventory, and business interruption risks).  

6. **Budget Sensitivity:**  
   - Not explicitly mentioned; prioritize comprehensive coverage over cost constraints.  

---  
**Final Recommendation

In [54]:
def recommend_policy(user_needs, policy_knowledge):
    prompt = """
Based ONLY on the structured policy data below and the user's needs:

1. Evaluate each policy against needs
2. Explain pros/cons
3. Recommend the best fitting policy
4. If no policy fits, say so clearly.

DO NOT invent features that are not listed.

"""

    prompt += f"\nUser Needs:\n{user_needs}\n"

    for name, data in policy_knowledge.items():
        prompt += f"\nPOLICY: {name}\n"
        prompt += f"COVERS: {data['COVERS']}\n"
        prompt += f"EXCLUDES: {data['EXCLUDES']}\n"
        prompt += f"LIMITS: {data['LIMIT']}\n"
        prompt += f"CONDITIONS: {data['CONDITION']}\n"
        prompt += f"DEFINITIONS: {data['DEFINITION']}\n"

    return call_openrouter(prompt)


In [55]:
recommendation = recommend_policy(user_requirements, policy_knowledge)
print(recommendation)


### Evaluation of Policies Against User Needs  

#### **Policy 1: COMMERCIAL INSURANCE POLICY**  
**Pros**:  
- Covers **fire, lightning, explosion** under Property Damage Section (matches *Fire* risk).  
- Includes **Equipment Breakdown** for mechanical failures (*optional recommendation met*).  
- **Legal Expenses** and **Public/Products Liability** included (extra protections beyond user’s core needs).  

**Cons**:  
- ❌ **Deal-Breaker Exclusion**: **Theft/Burglary Coverage** is **missing** for property/inventory. Only covers:  
  - Bodily injury from robbery attempts (via "Assault Extension").  
  - Loss of money in transit (via "Money Section").  
- ❌ **Electrical Damage** is **not explicitly covered** (no mention of short-circuit risks).  
- ❌ **Inadequate Limits**: Limits use **£ (GBP)** not **₹ (INR)**, conflicting with user’s ₹25 lakh requirement.  
- ❌ Excludes **theft from unattended vehicles** and **glass breakage** (not critical but highlights gaps).  

**Verdict**: Does *