In [2]:
import fitz

def read_pdf(file_path):
    file = fitz.open(file_path)
    text = ""
    for page in file:
        text += page.get_text("text")
    return text


In [3]:
from docx import Document

def read_doc(file_path):
    file = Document(file_path)
    text = ""
    for para in file.paragraphs:
        text += para.text + "\n"
    return text

In [4]:
from email import message_from_binary_file

def read_email(file_path):
    with open(file_path, "rb") as f:
        msg = message_from_binary_file(f)
    
    subject = msg.get("subject: ","")
    body = ""
    if msg.is_multipart():
        for part in msg.walk():
            content_type = part.get_content_type()
            if content_type == "text/plain":
                body += part.get_payload(decode=True).decode(errors="ignore")
    else:
        body = msg.get_payload(decode = True).decode(errors = "ignore")
    
    text = f"Subject: {subject} \n \n {body}"
    return text
    

In [5]:
# Cleaning Text
import re

def clean_text(text):
    text = re.sub(r'\s+',' ',text)
    return text.strip()


In [6]:
from langchain_text_splitters import RecursiveCharacterTextSplitter # pip install lanchain

def chunk_text(cl_text):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1000,      
        chunk_overlap=100,    
        separators=["\n\n", "\n", ".", " "]
    )
    
    chunks = text_splitter.split_text(cl_text)
    return chunks

In [7]:
import os

file_path = "file/EDLHLGA23009V012223.pdf"
file_type = os.path.splitext(file_path)[1].lower()
file_name = os.path.splitext(os.path.basename(file_path))[0]

if file_type == ".pdf":
    text = read_pdf(file_path)

elif file_type == ".doc":
    text = read_doc(file_path)
    cl_text = clean_text(text)

elif file_type == ".eml":
    text = read_email(file_path)
    cl_text = clean_text(text)

else:
    print(f"Invalid File Type \nOnly take pdf, doc and eml as file input")

cl_text = clean_text(text)
ch_text = chunk_text(cl_text)



In [8]:
from sentence_transformers import SentenceTransformer
from sklearn.preprocessing import normalize
import faiss
import numpy as np
import pickle

chunks = ch_text
model = SentenceTransformer("all-MiniLM-L6-v2")
embeddings = model.encode(chunks, convert_to_numpy=True)
embeddings = normalize(embeddings)

dimension = embeddings.shape[1]
index = faiss.IndexFlatIP(dimension)
index = faiss.IndexIDMap(index)

ids = np.arange(len(embeddings))
index.add_with_ids(embeddings, ids)

faiss.write_index(index, f"indexes/{file_name}vector.index")

with open(f"pickle/{file_name}chunks.pkl", "wb") as f:
    pickle.dump(ch_text, f)

  return forward_call(*args, **kwargs)


In [9]:
import faiss
import pickle

# Load index
index = faiss.read_index( f"indexes/{file_name}vector.index")

# Load chunks
with open(f"pickle/{file_name}chunks.pkl", "rb") as f:
    chunks = pickle.load(f)


In [None]:
from sentence_transformers import SentenceTransformer
import faiss, pickle
import subprocess

embed_model = SentenceTransformer('all-MiniLM-L6-v2')

query = "knee query in Pune 3 months old policy"
query_embedding = embed_model.encode([query], convert_to_numpy=True)
distances, indices = index.search(query_embedding, 3)
relevant_clauses = [chunks[i] for i in indices[0]]


context = "\n\n".join(relevant_clauses)
prompt = f"""

User query:
{query}

Relevant clauses:
{context}

Follow these steps:
1. Extract details from the query: age, gender, procedure, location, policy duration.
2. Decide whether the claim is approved or rejected.
3. Justify the decision with references to specific clauses.
4. Return JSON with fields: decision, amount (if any), justification, used_clauses.
"""

result = subprocess.run(
    ["ollama", "run", "mistral", prompt],
    capture_output=True,
    text=True
)

output = result.stdout.strip()

print("LLM Output:\n", output)


  return forward_call(*args, **kwargs)


In [10]:
from sentence_transformers import SentenceTransformer
import faiss, pickle
import requests
import json

# Load your embedding model
embed_model = SentenceTransformer('all-MiniLM-L6-v2')

# Example query
query = "knee query in Pune 3 months old policy"

# Create query embedding
query_embedding = embed_model.encode([query], convert_to_numpy=True)

# Search top 5 chunks
distances, indices = index.search(query_embedding, 5)
relevant_clauses = [chunks[i] for i in indices[0]]

# Build prompt
context = "\n\n".join(relevant_clauses)
prompt = f"""
User query:
{query}

Relevant clauses:
{context}

Follow these steps:
1. Extract details from the query: age, gender, procedure, location, policy duration.
2. Decide whether the claim is approved or rejected.
3. Justify the decision with references to specific clauses.
4. Return JSON with fields: decision, amount (if any), justification, used_clauses.
"""

# Call Ollama API
url = "http://localhost:11434/api/generate"
payload = {"model": "mistral", "prompt": prompt, "stream": False}

response = requests.post(url, json=payload)

# Parse and print output
result = response.json()
output = result.get("response", "").strip()
print("LLM Output:\n", output)


  return forward_call(*args, **kwargs)


LLM Output:
 {
  "decision": "Not Approved",
  "justification": "The query does not specify the age and gender of the insured individual. However, based on the policy description provided, it is a maternity or well-mother insurance for female expectant mothers or those who have delivered a newborn baby. The procedure mentioned in the query is 'knee query' which is not related to routine medical care provided as per the policy. Moreover, the policy excludes any charges payable under the maternity section (if opted as an optional cover) of the policy, which includes knee-related treatments as it falls under the category of infertility treatments or any other treatment not directly related to maternity or well-baby care. Lastly, the query mentions a 3-months old policy, but the policy does not cover routine medical care after the first discharge from the hospital (30 days following birth of new born baby).",
  "used_clauses": ["Insurance is the subject matter of solicitation", ". Routine 

In [71]:
print(cl_text)

Well Baby Well Mother- Add On Wordings Add On Wordings- Well Baby Well Mother Base Product UIN: EDLHLGP21462V032021 Add On UIN: EDLHLGA23009V012223 Edelweiss General Insurance Company Limited, Corporate Office: 5th Floor, Tower 3, Kohinoor City Mall, Kohinoor City, Kirol Road, Kurla (West), Mumbai - 400 070, Registered Office: Edelweiss House, Off CST Road, Kalina, Mumbai -400 098, IRDAI Regn. No.: 159, CIN: U66000MH2016PLC273758, Reach us on: 1800 12000, Email: support@edelweissinsurance.com, Website: www.edelweissinsurance.com, Issuing/Corporate Office: +91 22 4272 2200, Grievance Redressal Officer: +91 22 4931 4422, Dedicated Toll-Free Number for Grievance: 1800 120 216216. Trade logo displayed above belongs to Edelweiss Financial Services Limited and is used by Edelweiss General Insurance Company Limited under license. Insurance is the subject matter of solicitation. Air Ambulance Cover In consideration of the payment of additional premium to Us, We will cover the expenses incurred

In [51]:
print("Length:",len(ch_text))
for i in range(len(ch_text)):
    print(f"Chunk {i}: {ch_text[i]}")

Length: 8
Chunk 0: Well Baby Well Mother- Add On Wordings Add On Wordings- Well Baby Well Mother Base Product UIN: EDLHLGP21462V032021 Add On UIN: EDLHLGA23009V012223 Edelweiss General Insurance Company Limited, Corporate Office: 5th Floor, Tower 3, Kohinoor City Mall, Kohinoor City, Kirol Road, Kurla (West), Mumbai - 400 070, Registered Office: Edelweiss House, Off CST Road, Kalina, Mumbai -400 098, IRDAI Regn. No.: 159, CIN: U66000MH2016PLC273758, Reach us on: 1800 12000, Email: support@edelweissinsurance.com, Website: www.edelweissinsurance.com, Issuing/Corporate Office: +91 22 4272 2200, Grievance Redressal Officer: +91 22 4931 4422, Dedicated Toll-Free Number for Grievance: 1800 120 216216. Trade logo displayed above belongs to Edelweiss Financial Services Limited and is used by Edelweiss General Insurance Company Limited under license. Insurance is the subject matter of solicitation
Chunk 1: . Insurance is the subject matter of solicitation. Air Ambulance Cover In consideration o