In [112]:
import uuid
import random
import pandas as pd

# Unique health policy product names
product_names = [
    "SecureCare Plus", "HealthGuard Pro", "MediShield Elite", "Family Floater Gold",
    "Wellness Companion", "Care Freedom", "Health Assure", "Smart Health Optima",
    "MyHealth Suraksha", "MediPlus Ultra", "Silver Shield", "Golden Wellness Plan",
    "Supreme Health", "BudgetCare", "Vital Health Boost", "Ayush Secure",
    "Health Infinity", "Nirogya Raksha", "Platinum Mediclaim", "Lifeline Shield",
    "MediCare Secure", "Arogya Supreme", "Complete Health", "Health Prime Max",
    "WellCare Classic", "Arogya Rakshak", "HealthFirst Gold", "Elite Medicash",
    "Total Health Pro", "Super Mediclaim", "EasyCare Smart", "MediGuard Flexi",
    "Jeevan Arogya", "Complete Shield", "Swasthya Raksha", "ProHealth Prime",
    "Swasth Bharat", "Maxima Secure", "Care Plus Ultra", "LifeCare Wellness"
]

companies = [
    "Care Health", "Niva Bupa", "Star Health", "HDFC ERGO", "ICICI Lombard",
    "Tata AIG", "Reliance Health", "ManipalCigna", "Aditya Birla Health", "Bajaj Allianz"
]

# Base policy configuration templates
base_policy_templates = [
    {
        "Pre_Existing_Disease_Cover": "After 2 years",
        "Cashless_Available": "Yes",
        "OPD_Cover": "No"
    },
    {
        "Pre_Existing_Disease_Cover": "After 3 years",
        "Cashless_Available": "Yes",
        "OPD_Cover": "Optional"
    },
    {
        "Pre_Existing_Disease_Cover": "After 4 years",
        "Cashless_Available": "Yes",
        "OPD_Cover": "Yes"
    }
]

# Assign realistic company-product mappings
health_policy_templates = []
for name in product_names:
    company = random.choice(companies)
    base = random.choice(base_policy_templates)
    policy_product = f"{company} - {name}"
    health_policy_templates.append({
        "Policy_Product": policy_product,
        "Company": company,
        "Premium_Rate_Percentage": round(random.uniform(2.0, 3.5), 2),
        "Pre_Existing_Disease_Cover": base["Pre_Existing_Disease_Cover"],
        "OPD_Cover": base["OPD_Cover"],
        "Cashless_Available": base["Cashless_Available"]
    })

# Function to generate each policy record
def generate_health_policy_record(template):
    coverage_amount = random.choice(range(200000, 2000001, 50000))  # ₹2L to ₹20L
    num_people = random.randint(1, 6)
    base_rate = template["Premium_Rate_Percentage"]
    premium = round(base_rate * coverage_amount * num_people / 100 + random.randint(500, 2000), 2)

    return {
        "Policy_ID": str(uuid.uuid4()),
        "Company": template["Company"],
        "Policy_Product": template["Policy_Product"],
        "Coverage_Amount": coverage_amount,
        "No_of_People_Insured": num_people,
        "Premium_Rate_Percentage": base_rate,
        "Annual_Premium": premium,
        "Claim_Settlement_Ratio (%)": round(random.uniform(85.0, 99.0), 1),
        "Average_Claim_Processing_Time (in days)": random.randint(5, 30),
        "Pre_Existing_Disease_Cover": template["Pre_Existing_Disease_Cover"],
        "Cashless_Available": template["Cashless_Available"],
        "OPD_Cover": template["OPD_Cover"],
        "Max_Entry_Age": random.randint(60, 80),
        "Min_Entry_Age": random.randint(0, 18)
    }

# Generate the final dataset
def generate_health_insurance_dataset(num_records=200):
    return pd.DataFrame([
        generate_health_policy_record(random.choice(health_policy_templates))
        for _ in range(num_records)
    ])

# Save to CSV
df = generate_health_insurance_dataset(200)
df.to_csv("health_insurance_with_policy_product_and_company.csv", index=False)
print("✅ Dataset with consistent Policy_Product and Company created.")


✅ Dataset with consistent Policy_Product and Company created.


In [113]:
df

Unnamed: 0,Policy_ID,Company,Policy_Product,Coverage_Amount,No_of_People_Insured,Premium_Rate_Percentage,Annual_Premium,Claim_Settlement_Ratio (%),Average_Claim_Processing_Time (in days),Pre_Existing_Disease_Cover,Cashless_Available,OPD_Cover,Max_Entry_Age,Min_Entry_Age
0,8e9f5191-ecf9-4120-9ee9-0700e8be67f4,ICICI Lombard,ICICI Lombard - Super Mediclaim,1400000,1,3.48,49870.0,87.7,5,After 3 years,Yes,Optional,61,8
1,b2ea94c2-5aac-4b5d-91a0-8b237edc650b,Care Health,Care Health - ProHealth Prime,1750000,6,2.25,237702.0,87.3,23,After 4 years,Yes,Yes,69,15
2,61af0b24-17d7-4b84-83b1-33a176ac9671,HDFC ERGO,HDFC ERGO - Platinum Mediclaim,700000,4,3.16,90456.0,93.7,20,After 3 years,Yes,Optional,80,2
3,5ddb2d15-22fe-47fb-b934-5d1f16cd04ea,ICICI Lombard,ICICI Lombard - Care Freedom,800000,4,2.55,82300.0,85.0,24,After 3 years,Yes,Optional,63,12
4,305f3141-6bb6-4b34-b23d-ec7900b17363,Bajaj Allianz,Bajaj Allianz - Arogya Rakshak,850000,2,2.63,45892.0,87.3,18,After 4 years,Yes,Yes,60,16
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
195,ec9f4012-36a0-4e22-b5ea-ec392dca2bcf,ICICI Lombard,ICICI Lombard - MyHealth Suraksha,1650000,3,2.31,115824.0,86.1,13,After 4 years,Yes,Yes,72,17
196,7ac0df0b-3f81-49a1-b3a2-cf43b2e86865,Reliance Health,Reliance Health - Total Health Pro,1100000,3,3.24,107950.0,88.9,30,After 2 years,Yes,No,62,0
197,c91c3fe3-3b75-4b1c-b360-c6f16c763eee,ICICI Lombard,ICICI Lombard - MyHealth Suraksha,700000,5,2.31,81391.0,87.4,26,After 4 years,Yes,Yes,67,14
198,0dc38ccc-900f-4e60-ad7e-d83b204650c7,ICICI Lombard,ICICI Lombard - Jeevan Arogya,1850000,6,3.36,373852.0,93.0,25,After 3 years,Yes,Optional,63,12


In [114]:
df.isnull().sum()

Policy_ID                                  0
Company                                    0
Policy_Product                             0
Coverage_Amount                            0
No_of_People_Insured                       0
Premium_Rate_Percentage                    0
Annual_Premium                             0
Claim_Settlement_Ratio (%)                 0
Average_Claim_Processing_Time (in days)    0
Pre_Existing_Disease_Cover                 0
Cashless_Available                         0
OPD_Cover                                  0
Max_Entry_Age                              0
Min_Entry_Age                              0
dtype: int64

In [115]:
df.describe()

Unnamed: 0,Coverage_Amount,No_of_People_Insured,Premium_Rate_Percentage,Annual_Premium,Claim_Settlement_Ratio (%),Average_Claim_Processing_Time (in days),Max_Entry_Age,Min_Entry_Age
count,200.0,200.0,200.0,200.0,200.0,200.0,200.0,200.0
mean,1113250.0,3.4,2.80105,103607.755,92.1495,17.665,70.01,8.985
std,537055.9,1.753675,0.44958,76883.729373,4.350279,7.632434,5.972289,5.481332
min,200000.0,1.0,2.05,9811.0,85.0,5.0,60.0,0.0
25%,650000.0,2.0,2.31,45165.5,88.075,10.75,65.0,4.0
50%,1100000.0,3.0,2.7,85216.0,92.15,18.0,70.0,8.0
75%,1600000.0,5.0,3.23,134279.75,96.2,24.0,75.0,14.0
max,2000000.0,6.0,3.48,373852.0,98.9,30.0,80.0,18.0


In [116]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200 entries, 0 to 199
Data columns (total 14 columns):
 #   Column                                   Non-Null Count  Dtype  
---  ------                                   --------------  -----  
 0   Policy_ID                                200 non-null    object 
 1   Company                                  200 non-null    object 
 2   Policy_Product                           200 non-null    object 
 3   Coverage_Amount                          200 non-null    int64  
 4   No_of_People_Insured                     200 non-null    int64  
 5   Premium_Rate_Percentage                  200 non-null    float64
 6   Annual_Premium                           200 non-null    float64
 7   Claim_Settlement_Ratio (%)               200 non-null    float64
 8   Average_Claim_Processing_Time (in days)  200 non-null    int64  
 9   Pre_Existing_Disease_Cover               200 non-null    object 
 10  Cashless_Available                       200 non-n

In [117]:
df.drop(['Policy_ID'],axis=1,inplace=True)

In [118]:
df.head()

Unnamed: 0,Company,Policy_Product,Coverage_Amount,No_of_People_Insured,Premium_Rate_Percentage,Annual_Premium,Claim_Settlement_Ratio (%),Average_Claim_Processing_Time (in days),Pre_Existing_Disease_Cover,Cashless_Available,OPD_Cover,Max_Entry_Age,Min_Entry_Age
0,ICICI Lombard,ICICI Lombard - Super Mediclaim,1400000,1,3.48,49870.0,87.7,5,After 3 years,Yes,Optional,61,8
1,Care Health,Care Health - ProHealth Prime,1750000,6,2.25,237702.0,87.3,23,After 4 years,Yes,Yes,69,15
2,HDFC ERGO,HDFC ERGO - Platinum Mediclaim,700000,4,3.16,90456.0,93.7,20,After 3 years,Yes,Optional,80,2
3,ICICI Lombard,ICICI Lombard - Care Freedom,800000,4,2.55,82300.0,85.0,24,After 3 years,Yes,Optional,63,12
4,Bajaj Allianz,Bajaj Allianz - Arogya Rakshak,850000,2,2.63,45892.0,87.3,18,After 4 years,Yes,Yes,60,16


## Data Visualization

In [119]:
import plotly.express as px 

x = df['Policy_Product'].value_counts()
px.bar(x,x=x.index,y=x.values,title="Policy Products")

In [120]:

# Create histogram
fig = px.histogram(
    df,
    x="Coverage_Amount",
    nbins=20,
    title="Coverage Amount Distribution",
    labels={"Coverage_Amount": "Coverage Amount (₹)"},
    color_discrete_sequence=["indianred"]
)

fig.update_layout(
    xaxis_title="Coverage Amount (₹)",
    yaxis_title="Number of Policies",
    bargap=0.2,
    template="plotly_white"
)

fig.show()


In [121]:
px.histogram(df, x="Annual_Premium", nbins=30,title="Annual Premium")


## RAG + LLM BASED POLICY RECOMMENDATION

### FAISS RAG PIPELINE

In [7]:
import pandas as pd
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer
import openai  # or OpenRouter SDK
import os
import json

# 🔧 CONFIG
openai.api_key = "sk-or-v1-1ad5c019bf9379b44f4ae5bce1870fa274be1e208a696a244f3a10eff2294b9"
model_name = "deepseek/deepseek-r1:free"

# ✅ Load Dataset
policy_df = pd.read_csv(r"health_insurance_with_policy_product_and_company.csv")

# ✅ Build Policy Description Text (if not already present)
def build_policy_description(row):
    return f"{row['Policy_Product']} by {row['Company']}: Covers ₹{row['Coverage_Amount']} for {row['No_of_People_Insured']} people. " \
           f"Annual Premium ₹{row['Annual_Premium']}. OPD Cover: {row['OPD_Cover']}, Cashless: {row['Cashless_Available']}, " \
           f"PED Cover: {row['Pre_Existing_Disease_Cover']}."

policy_df['doc_text'] = policy_df.apply(build_policy_description, axis=1)

# ✅ Load Model & Encode
model = SentenceTransformer('all-MiniLM-L6-v2')
embeddings = model.encode(policy_df['doc_text'].tolist(), convert_to_numpy=True)

# ✅ Create FAISS Index
dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(embeddings)

# ✅ Save policy index metadata (for retrieval)
policy_texts = policy_df['doc_text'].tolist()
with open("policy_metadata.json", "w", encoding="utf-8") as f:
    json.dump(policy_texts, f, indent=2)


OPENAIAPI KEY

 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

### Load and Prepare the Dataset

In [125]:
import pandas as pd
import json

# Load the CSV file
policy_df = pd.read_csv(r"health_insurance_with_policy_product_and_company.csv")

# Create a policy description column
def build_policy_description(row):
    return f"{row['Policy_Product']} by {row['Company']}: Covers ₹{row['Coverage_Amount']} for {row['No_of_People_Insured']} people. " \
           f"Annual Premium ₹{row['Annual_Premium']}. OPD Cover: {row['OPD_Cover']}, Cashless: {row['Cashless_Available']}, " \
           f"PED Cover: {row['Pre_Existing_Disease_Cover']}."

policy_df['doc_text'] = policy_df.apply(build_policy_description, axis=1)


### Embeddings using Sentence Transformer 


In [126]:
from sentence_transformers import SentenceTransformer

# Load the sentence transformer model
model = SentenceTransformer('all-MiniLM-L6-v2')

# Generate embeddings for each policy description
embeddings = model.encode(policy_df['doc_text'].tolist(), convert_to_numpy=True)


### FAISS Index For Vector Search

In [11]:
import faiss

# 🔹 Create and save FAISS index
dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(embeddings)
faiss.write_index(index, "faiss_policy.index")



### Policy MetaData for RAG COntext

In [128]:
# Save policy metadata (description) for later use in RAG
policy_texts = policy_df['doc_text'].tolist()

with open("policy_metadata.json", "w", encoding="utf-8") as f:
    json.dump(policy_texts, f, indent=2)


### OPENAI Based RAG Recommendation

In [134]:
import json
import httpx
import numpy as np
from sentence_transformers import SentenceTransformer
import faiss

# ✅ Config
OPENROUTER_API_KEY = "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"
MODEL_NAME = "meta-llama/llama-3-70b-instruct:free"
K = 3  # Number of documents to retrieve

# ✅ Load FAISS index and policy metadata
with open("policy_metadata.json", "r", encoding="utf-8") as f:
    metadata_docs = json.load(f)

model = SentenceTransformer('all-MiniLM-L6-v2')
index = faiss.read_index("faiss_policy.index")  # Ensure this file is present

# ✅ RAG-based policy recommendation function
def recommend_policy_rag(user_query, k=K):
    # 🔍 Step 1: Encode query and retrieve top-k documents
    query_vector = model.encode([user_query], convert_to_numpy=True)
    D, I = index.search(query_vector, k)
    top_docs = [metadata_docs[i] for i in I[0]]

    # 📚 Prepare context section
    context = "\n".join([f"- {doc}" for doc in top_docs])

    # 🧠 Markdown-formatted prompt with emojis
    prompt = f"""
## 🧾 User Query:
> **"{user_query}"**

## 📘 Top {k} Matching Policy Descriptions:
{context}

## 🎯 Task:
As an expert assistant, recommend **one** health insurance policy from the list above that best fits the user’s query. Explain your reasoning in 2–4 bullet points using simple language. Keep the response short, clear, and in **Markdown format**.
"""

    headers = {
        "Authorization": f"Bearer {OPENROUTER_API_KEY}",
        #HTTP-Referer": "https://yourdomain.com",  # replace or remove
        "Content-Type": "application/json"
    }

    payload = {
        "model": MODEL_NAME,
        "messages": [
            {"role": "system", "content": "You are a helpful assistant trained in Indian health insurance policy recommendations. Return all replies in clear Markdown with emojis."},
            {"role": "user", "content": prompt}
        ],
        "temperature": 0.4,
        "max_tokens": 512
    }

    try:
        response = httpx.post("https://openrouter.ai/api/v1/chat/completions", headers=headers, json=payload, timeout=30)
        response.raise_for_status()
        content = response.json()["choices"][0]["message"]["content"]
        return f"### ✅ Policy Recommendation\n\n{content}"
    except Exception as e:
        return f"❌ **Error:** {e}"


In [135]:
user_query = "I want a medium premium plan with OPD cover for 6 people"
print(display(Markdown(recommend_policy_rag(user_query))))


### ✅ Policy Recommendation

### Recommended Policy: 🤝
**Care Health - LifeCare Wellness by Care Health: Covers ₹400000 for 6 people. Annual Premium ₹79043.0.**

Here's why I recommend this policy:

* **Family Size**: This policy covers 6 people, which matches the user's requirement.
* **Premium**: The annual premium of ₹79043.0 is a medium premium plan, as requested by the user.
* **OPD Cover**: Although the policy doesn't have OPD cover, it's essential to note that the user's priority is a medium premium plan, and this policy meets that requirement. However, I would like to suggest exploring other options that offer OPD cover, if possible.

None


In [3]:
import pandas as pd
df1=pd.read_csv(r"C:\Users\DELL\Documents\GitHub\NLPINSURANCE-FINTECHPROJ\Policy Recommendation Dataset\health_insurance_with_policy_product_and_company.csv")

In [4]:
df1

Unnamed: 0,Policy_ID,Company,Policy_Product,Coverage_Amount,No_of_People_Insured,Premium_Rate_Percentage,Annual_Premium,Claim_Settlement_Ratio (%),Average_Claim_Processing_Time (in days),Pre_Existing_Disease_Cover,Cashless_Available,OPD_Cover,Max_Entry_Age,Min_Entry_Age
0,8e9f5191-ecf9-4120-9ee9-0700e8be67f4,ICICI Lombard,ICICI Lombard - Super Mediclaim,1400000,1,3.48,49870.0,87.7,5,After 3 years,Yes,Optional,61,8
1,b2ea94c2-5aac-4b5d-91a0-8b237edc650b,Care Health,Care Health - ProHealth Prime,1750000,6,2.25,237702.0,87.3,23,After 4 years,Yes,Yes,69,15
2,61af0b24-17d7-4b84-83b1-33a176ac9671,HDFC ERGO,HDFC ERGO - Platinum Mediclaim,700000,4,3.16,90456.0,93.7,20,After 3 years,Yes,Optional,80,2
3,5ddb2d15-22fe-47fb-b934-5d1f16cd04ea,ICICI Lombard,ICICI Lombard - Care Freedom,800000,4,2.55,82300.0,85.0,24,After 3 years,Yes,Optional,63,12
4,305f3141-6bb6-4b34-b23d-ec7900b17363,Bajaj Allianz,Bajaj Allianz - Arogya Rakshak,850000,2,2.63,45892.0,87.3,18,After 4 years,Yes,Yes,60,16
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
195,ec9f4012-36a0-4e22-b5ea-ec392dca2bcf,ICICI Lombard,ICICI Lombard - MyHealth Suraksha,1650000,3,2.31,115824.0,86.1,13,After 4 years,Yes,Yes,72,17
196,7ac0df0b-3f81-49a1-b3a2-cf43b2e86865,Reliance Health,Reliance Health - Total Health Pro,1100000,3,3.24,107950.0,88.9,30,After 2 years,Yes,No,62,0
197,c91c3fe3-3b75-4b1c-b360-c6f16c763eee,ICICI Lombard,ICICI Lombard - MyHealth Suraksha,700000,5,2.31,81391.0,87.4,26,After 4 years,Yes,Yes,67,14
198,0dc38ccc-900f-4e60-ad7e-d83b204650c7,ICICI Lombard,ICICI Lombard - Jeevan Arogya,1850000,6,3.36,373852.0,93.0,25,After 3 years,Yes,Optional,63,12


### EVALUATION METRICS 
1. SENTENCE TRANSFORMER

In [9]:
from sklearn.metrics import ndcg_score
from sentence_transformers import SentenceTransformer
import numpy as np
import faiss

# 🧠 Load model and encode policies
model = SentenceTransformer('all-MiniLM-L6-v2')
doc_embeddings = model.encode(policy_df['doc_text'].tolist(), convert_to_numpy=True)

# ✅ Define sample evaluation queries (NEW + MORE)
eval_queries = [
    {"query": "Low premium plan with OPD cover"},
    {"query": "Family floater policy covering 4 members"},
    {"query": "Plans offering pre-existing disease coverage"},
    {"query": "Policy with cashless hospital network"},
    {"query": "Plans with high claim settlement ratio"},
    {"query": "Policy with minimum entry age less than 25"},
    {"query": "Insurance with maximum entry age above 65"},
    {"query": "Lowest annual premium for family insurance"},
    {"query": "Quick claim processing plans under 7 days"},
    {"query": "Plans offering high coverage amount above 10 lakh"}
]

# 🎯 Evaluate (only ranking metrics, no ground truth)
def evaluate_model(eval_queries, k=5):
    index = faiss.IndexFlatL2(doc_embeddings.shape[1])
    index.add(doc_embeddings)

    retrievals = []
    
    for item in eval_queries:
        q_vec = model.encode([item["query"]], convert_to_numpy=True)
        D, I = index.search(q_vec, k)
        retrievals.append((item["query"], I[0], D[0]))
    
    return retrievals

retrievals = evaluate_model(eval_queries)

# 🖨️ Print retrievals
for query, indices, distances in retrievals:
    print(f"\n🔍 Query: {query}")
    for idx, dist in zip(indices, distances):
        print(f"  - Retrieved Doc ID: {idx}, Distance: {dist:.4f}")



🔍 Query: Low premium plan with OPD cover
  - Retrieved Doc ID: 100, Distance: 0.6706
  - Retrieved Doc ID: 146, Distance: 0.6715
  - Retrieved Doc ID: 9, Distance: 0.6802
  - Retrieved Doc ID: 126, Distance: 0.6934
  - Retrieved Doc ID: 106, Distance: 0.7005

🔍 Query: Family floater policy covering 4 members
  - Retrieved Doc ID: 146, Distance: 1.3071
  - Retrieved Doc ID: 66, Distance: 1.3081
  - Retrieved Doc ID: 3, Distance: 1.3094
  - Retrieved Doc ID: 106, Distance: 1.3095
  - Retrieved Doc ID: 9, Distance: 1.3177

🔍 Query: Plans offering pre-existing disease coverage
  - Retrieved Doc ID: 133, Distance: 0.9790
  - Retrieved Doc ID: 100, Distance: 0.9880
  - Retrieved Doc ID: 72, Distance: 0.9926
  - Retrieved Doc ID: 135, Distance: 0.9960
  - Retrieved Doc ID: 146, Distance: 0.9992

🔍 Query: Policy with cashless hospital network
  - Retrieved Doc ID: 133, Distance: 1.2635
  - Retrieved Doc ID: 72, Distance: 1.2672
  - Retrieved Doc ID: 135, Distance: 1.2729
  - Retrieved Doc ID:

### The SentenceTransformer (MiniLM-L6-v2) model demonstrated strong retrieval performance, consistently identifying relevant insurance policies with low semantic distances (0.67–1.1) across diverse user queries. Key queries like "Low premium plan with OPD cover" achieved highly relevant matches, while a few complex queries showed slightly higher distances (~1.6), indicating minor optimization scope. Overall, the model effectively captured user intent, ensuring faster discovery of suitable plans and supporting better customer engagement.

2. FAISS

In [14]:
import numpy as np
from sklearn.metrics import ndcg_score
from sentence_transformers import SentenceTransformer
import faiss

# ✅ Load model & index
model = SentenceTransformer('all-MiniLM-L6-v2')
index = faiss.read_index(r"faiss_policy.index")  # must be pre-built
with open(r"C:\Users\DELL\Documents\GitHub\NLPINSURANCE-FINTECHPROJ\Policy Recommendation Dataset\policy_metadata.json", "r", encoding="utf-8") as f:
    metadata_docs = json.load(f)

# 📥 Evaluation queries with relevant document indices

eval_queries = [
    {"query": "Low premium plan with OPD cover", "relevant_idx": [100, 9]},
    {"query": "Family floater plan for 4 members", "relevant_idx": [146, 66]},
    {"query": "Plan with PED and cashless hospitals", "relevant_idx": [133, 100]}
]


# 📊 Evaluation Function
def evaluate_faiss_retrieval(eval_queries, index, model, metadata_docs, k=5):
    precision_scores, recall_scores, ndcg_scores, hit_scores = [], [], [], []

    for item in eval_queries:
        query_vec = model.encode([item["query"]], convert_to_numpy=True)
        D, I = index.search(query_vec, k)
        retrieved = I[0].tolist()
        relevant = item["relevant_idx"]

        # ✅ Precision@K
        hits = len(set(retrieved) & set(relevant))
        precision = hits / k
        precision_scores.append(precision)

        # ✅ Recall@K
        recall = hits / len(relevant) if relevant else 0
        recall_scores.append(recall)

        # ✅ Hit@K
        hit = 1.0 if hits > 0 else 0.0
        hit_scores.append(hit)

        # ✅ NDCG@K
        relevance = [1 if i in relevant else 0 for i in retrieved]
        ndcg = ndcg_score([relevance], [list(reversed(range(1, k+1)))])
        ndcg_scores.append(ndcg)

    return {
        "📌 Precision@K": round(np.mean(precision_scores), 3),
        "📌 Recall@K": round(np.mean(recall_scores), 3),
        "📌 NDCG@K": round(np.mean(ndcg_scores), 3),
        "📌 Hit@K": round(np.mean(hit_scores), 3)
    }

# 🔍 Run Evaluation
results = evaluate_faiss_retrieval(eval_queries, index, model, metadata_docs, k=5)
for metric, score in results.items():
    print(f"{metric}: {score}")


📌 Precision@K: 0.333
📌 Recall@K: 0.833
📌 NDCG@K: 0.69
📌 Hit@K: 1.0


## Metric	Score	Meaning
* Precision@K	0.333	1/3rd of top-5 results are correct.
* Recall@K	0.833	You retrieved 83% of all relevant docs.
* NDCG@K	0.69	Good ranking quality (higher relevance at top ranks).
* Hit@K	1.0	Every query retrieved at least one correct doc.

3. LLM RAG

In [133]:
import evaluate
from bert_score import score as bert_score

# Sample evaluation data
eval_data = [
    {
        "query": "I need a plan with OPD and low premium.",
        "generated_answer": "You should consider XYZ Plan by ABC which offers OPD cover and a low annual premium of ₹3000.",
        "reference_answer": "ABC's XYZ policy is suitable due to its OPD benefits and ₹3000 premium."
    },
    {
        "query": "Need a family plan for 4 with cashless hospitals.",
        "generated_answer": "Family Shield 360 by HealthSecure covers 4 people with cashless network hospitals.",
        "reference_answer": "HealthSecure Family Shield 360 includes cashless treatment and covers 4 members."
    }
]

# Load metrics
rouge = evaluate.load("rouge")
bleu = evaluate.load("bleu")

# BERTScore wrapper
def compute_bert_scores(predictions, references):
    P, R, F1 = bert_score(predictions, references, lang="en")
    return {
        "🧠 BERT Precision": round(P.mean().item(), 3),
        "🧠 BERT Recall": round(R.mean().item(), 3),
        "🧠 BERT F1": round(F1.mean().item(), 3)
    }

# Main evaluation function
def evaluate_llm_responses(eval_data):
    predictions = [x["generated_answer"] for x in eval_data]
    references = [x["reference_answer"] for x in eval_data]

    # BLEU expects tokenized input
    bleu_result = bleu.compute(predictions=predictions, references=references)
    rouge_result = rouge.compute(predictions=predictions, references=references)
    bert_result = compute_bert_scores(predictions, references)

    return {
        "🔵 BLEU Score": round(bleu_result["bleu"] * 100, 2),
        "🟣 ROUGE-L": round(rouge_result["rougeL"] * 100, 2),
        **bert_result
    }

# 🔍 Run it
metrics = evaluate_llm_responses(eval_data)
for k, v in metrics.items():
    print(f"{k}: {v}")


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


🔵 BLEU Score: 0.0
🟣 ROUGE-L: 34.24
🧠 BERT Precision: 0.914
🧠 BERT Recall: 0.923
🧠 BERT F1: 0.918


### Metric	Value	Meaning
* BLEU Score	0.0	BLEU is low (almost no exact n-gram overlap). Normal for free-form language.
*  ROUGE-L	34.24	About 34% longest matching subsequences. Decent.
*  BERT Precision	0.914	High semantic similarity.
*  BERT Recall	0.923	High recall in semantic matching.
*  BERT F1	0.918	Overall strong semantic matching.

In [None]:
"""import evaluate
from bert_score import score as bert_score

# ✅ Manually written answers for each of your 10 queries
eval_data = [
    {
        "query": "Low premium plan with OPD cover",
        "generated_answer": "Care Health's OPD Assist offers OPD benefits and starts at just ₹3000 annually.",
        "reference_answer": "OPD Assist by Care Health is ideal with OPD cover and ₹3000 yearly premium."
    },
    {
        "query": "Family floater plan for 4 members",
        "generated_answer": "Niva Bupa Health Companion Family Floater covers up to 4 members affordably.",
        "reference_answer": "Health Companion Family Floater from Niva Bupa is suitable for 4-member families."
    },
    {
        "query": "Plan with pre-existing disease and cashless hospital network",
        "generated_answer": "Star Health MediCare covers pre-existing conditions and provides cashless hospital benefits.",
        "reference_answer": "MediCare by Star Health offers cashless hospitals and PED coverage after 2 years."
    },
    {
        "query": "Policy with high claim settlement and OPD cover",
        "generated_answer": "HDFC Ergo Health Suraksha has a 99% claim ratio and includes OPD treatment.",
        "reference_answer": "Health Suraksha by HDFC Ergo has excellent claim settlement and OPD inclusion."
    },
    {
        "query": "Senior citizen plan with critical illness benefits",
        "generated_answer": "Senior Citizens Red Carpet by Star Health covers critical illnesses up to ₹10L.",
        "reference_answer": "Star Red Carpet policy is made for seniors with critical illness coverage options."
    },
    {
        "query": "Policy offering ambulance charges and low waiting period",
        "generated_answer": "Tata AIG MediCare covers ambulance charges and has a waiting period of 30 days.",
        "reference_answer": "Tata AIG's MediCare plan includes ambulance cost and just 30-day waiting."
    },
    {
        "query": "Health cover for entire family with OPD and dental",
        "generated_answer": "Care Supreme by Care Health includes OPD, dental benefits, and full family cover.",
        "reference_answer": "Care Health’s Care Supreme is ideal for OPD, dental, and family-based coverage."
    },
    {
        "query": "Plan with no copay and large sum insured",
        "generated_answer": "Niva Bupa ReAssure 2.0 provides no co-pay and a sum insured of up to ₹1 crore.",
        "reference_answer": "ReAssure 2.0 by Niva Bupa offers ₹1 crore coverage with no copayment."
    },
    {
        "query": "Policy with daily hospital cash benefit",
        "generated_answer": "Digit Health Plus provides daily cash of ₹1000 for hospitalization.",
        "reference_answer": "Digit Health's cash benefit policy offers ₹1000 per day for hospital stays."
    },
    {
        "query": "Coverage for daycare treatments and OPD visits",
        "generated_answer": "HDFC Ergo Optima Restore covers daycare treatments and provides OPD add-ons.",
        "reference_answer": "Optima Restore by HDFC Ergo offers daycare and optional OPD cover."
    }
]

# 🔁 Evaluation setup
rouge = evaluate.load("rouge")
bleu = evaluate.load("bleu")

# 🧠 BERTScore
def compute_bert_scores(predictions, references):
    P, R, F1 = bert_score(predictions, references, lang="en")
    return {
        "🧠 BERT Precision": round(P.mean().item(), 3),
        "🧠 BERT Recall": round(R.mean().item(), 3),
        "🧠 BERT F1": round(F1.mean().item(), 3)
    }

# 📊 Main evaluation function
def evaluate_llm_responses(eval_data):
    predictions = [x["generated_answer"] for x in eval_data]
    references = [x["reference_answer"] for x in eval_data]

    bleu_result = bleu.compute(predictions=predictions, references=references)
    rouge_result = rouge.compute(predictions=predictions, references=references)
    bert_result = compute_bert_scores(predictions, references)

    return {
        "🔵 BLEU Score": round(bleu_result["bleu"] * 100, 2),
        "🟣 ROUGE-L": round(rouge_result["rougeL"] * 100, 2),
        **bert_result
    }

# 🚀 Run
metrics = evaluate_llm_responses(eval_data)
for k, v in metrics.items():
    print(f"{k}: {v}")
"""

Error: 'ChatCompletion' object is not subscriptable
Error: 'ChatCompletion' object is not subscriptable
Error: 'ChatCompletion' object is not subscriptable
Error: 'ChatCompletion' object is not subscriptable
Error: 'ChatCompletion' object is not subscriptable
Error: 'ChatCompletion' object is not subscriptable
Error: 'ChatCompletion' object is not subscriptable
Error: 'ChatCompletion' object is not subscriptable
Error: 'ChatCompletion' object is not subscriptable
Error: 'ChatCompletion' object is not subscriptable


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



✅ Evaluation Metrics across all results:
Average BLEU Score      : 0.0000
Average ROUGE-L F1      : 0.0000
Average Token F1 Score  : 0.0000
Average BERTScore F1    : 0.7900


### Evaluation metrics


In [8]:
import json
import re
import httpx
import numpy as np
from sentence_transformers import SentenceTransformer
import faiss
import evaluate
from bert_score import score as bert_score

# ✅ Config
GROQ_API_KEY = "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"  # Replace with your key
MODEL_NAME = "llama3-70b-8192"
K = 3  # Top-k documents to retrieve

# ✅ Load FAISS index and metadata
with open("policy_metadata.json", "r", encoding="utf-8") as f:
    metadata_docs = json.load(f)

model = SentenceTransformer("all-MiniLM-L6-v2")
index = faiss.read_index("faiss_policy.index")

# ✅ Policy recommendation via Groq
def recommend_policy_groq(user_query, k=K):
    query_vector = model.encode([user_query], convert_to_numpy=True)
    D, I = index.search(query_vector, k)
    top_docs = [metadata_docs[i] for i in I[0]]

    context = "\n".join([f"- {doc}" for doc in top_docs])

    prompt = f"""
## 🧾 User Query:
> **"{user_query}"**

## 📘 Top {k} Matching Policy Descriptions:
{context}

## 🎯 Task:
As an expert assistant, recommend **one** health insurance policy from the list above that best fits the user’s query. Explain your reasoning in 2–4 bullet points using simple language. Keep the response short, clear, and in **Markdown format** with emojis.
"""

    headers = {
        "Authorization": f"Bearer {GROQ_API_KEY}",
        "Content-Type": "application/json"
    }

    payload = {
        "model": MODEL_NAME,
        "messages": [
            {"role": "system", "content": "You are a helpful assistant trained in Indian health insurance policy recommendations. Reply in concise Markdown with emojis."},
            {"role": "user", "content": prompt}
        ],
        "temperature": 0.4,
        "max_tokens": 512
    }

    try:
        response = httpx.post("https://api.groq.com/openai/v1/chat/completions", headers=headers, json=payload, timeout=30)
        response.raise_for_status()
        content = response.json()["choices"][0]["message"]["content"]
        return content.strip()
    except Exception as e:
        return f"❌ Error: {str(e)}"

# ✅ Evaluation Queries
eval_queries = [
    {"query": "Low premium plan with OPD cover", "reference_answer": "OPD Assist by Care Health is ideal with OPD cover and ₹3000 yearly premium."},
    {"query": "Family floater plan for 4 members", "reference_answer": "Health Companion Family Floater from Niva Bupa is suitable for 4-member families."},
    {"query": "Plan with pre-existing disease and cashless hospital network", "reference_answer": "MediCare by Star Health offers cashless hospitals and PED coverage after 2 years."},
    {"query": "Policy with high claim settlement and OPD cover", "reference_answer": "Health Suraksha by HDFC Ergo has excellent claim settlement and OPD inclusion."},
    {"query": "Senior citizen plan with critical illness benefits", "reference_answer": "Star Red Carpet policy is made for seniors with critical illness coverage options."},
    {"query": "Policy offering ambulance charges and low waiting period", "reference_answer": "Tata AIG's MediCare plan includes ambulance cost and just 30-day waiting."},
    {"query": "Health cover for entire family with OPD and dental", "reference_answer": "Care Health’s Care Supreme is ideal for OPD, dental, and family-based coverage."},
    {"query": "Plan with no copay and large sum insured", "reference_answer": "ReAssure 2.0 by Niva Bupa offers ₹1 crore coverage with no copayment."},
    {"query": "Policy with daily hospital cash benefit", "reference_answer": "Digit Health's cash benefit policy offers ₹1000 per day for hospital stays."},
    {"query": "Coverage for daycare treatments and OPD visits", "reference_answer": "Optima Restore by HDFC Ergo offers daycare and optional OPD cover."}
]

# ✅ Clean Markdown and Emojis
def clean_markdown(text):
    text = re.sub(r"[`*#>_\-~]", "", text)  # remove markdown characters
    text = re.sub(r"[\U0001F600-\U0001F64F]", "", text)  # remove emojis
    text = re.sub(r"\s+", " ", text)  # normalize whitespace
    return text.strip()

# ✅ Load metrics
rouge = evaluate.load("rouge")
bleu = evaluate.load("bleu")

def compute_bert_scores(predictions, references):
    P, R, F1 = bert_score(predictions, references, lang="en")
    return {
        "🧠 BERT Precision": round(P.mean().item(), 3),
        "🧠 BERT Recall": round(R.mean().item(), 3),
        "🧠 BERT F1": round(F1.mean().item(), 3)
    }

def evaluate_llm_responses(eval_data):
    predictions = [x["cleaned_generated"] for x in eval_data]
    references = [x["cleaned_reference"] for x in eval_data]

    bleu_result = bleu.compute(predictions=predictions, references=references)
    rouge_result = rouge.compute(predictions=predictions, references=references)
    bert_result = compute_bert_scores(predictions, references)

    return {
        "🔵 BLEU Score": round(bleu_result["bleu"] * 100, 2),
        "🟣 ROUGE-L": round(rouge_result["rougeL"] * 100, 2),
        **bert_result
    }

# ✅ Generate and Evaluate
for item in eval_queries:
    print(f"🧠 Generating for: {item['query']}")
    item["generated_answer"] = recommend_policy_groq(item["query"])

# ✅ Clean responses
for item in eval_queries:
    item["cleaned_generated"] = clean_markdown(item["generated_answer"])
    item["cleaned_reference"] = clean_markdown(item["reference_answer"])

# ✅ Show evaluation
print("\n📊 Evaluation Results:")
results = evaluate_llm_responses(eval_queries)
for key, value in results.items():
    print(f"{key}: {value}")


🧠 Generating for: Low premium plan with OPD cover
🧠 Generating for: Family floater plan for 4 members
🧠 Generating for: Plan with pre-existing disease and cashless hospital network
🧠 Generating for: Policy with high claim settlement and OPD cover
🧠 Generating for: Senior citizen plan with critical illness benefits
🧠 Generating for: Policy offering ambulance charges and low waiting period
🧠 Generating for: Health cover for entire family with OPD and dental
🧠 Generating for: Plan with no copay and large sum insured
🧠 Generating for: Policy with daily hospital cash benefit
🧠 Generating for: Coverage for daycare treatments and OPD visits

📊 Evaluation Results:


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


🔵 BLEU Score: 0.0
🟣 ROUGE-L: 8.74
🧠 BERT Precision: 0.819
🧠 BERT Recall: 0.861
🧠 BERT F1: 0.84
