# Step 1: Decode the InsuranceQA Dataset

In [15]:
import json
import gzip

# Function to load vocabulary from a file
def load_vocabulary(vocab_file_path):
    vocab_dict = {}
    with open(vocab_file_path, "r", encoding="utf-8") as file:
        for line in file:
            parts = line.strip().split("\t", 1)  # Split only on the first tab
            if len(parts) == 2:
                index, word = parts
                vocab_dict[index] = word  # Store mapping
    return vocab_dict

# Function to decode category and question only
def decode_questions(encoded_file_path, vocab_dict1, vocab_dict2):
    decoded_questions = []
    
    try:
        with gzip.open(encoded_file_path, 'rt', encoding='utf-8') as file:
            for line in file:
                parts = line.strip().split("\t")
                
                if len(parts) >= 2:  # Ensure the format is correct
                    question_category = parts[0]  # The first part is the category
                    encoded_question = parts[1]  # The second part is the encoded question

                    # Decode the question using both vocab dictionaries
                    decoded_question = " ".join([vocab_dict1.get(token, vocab_dict2.get(token, "[UNKNOWN]")) for token in encoded_question.split()])

                    # Store the decoded category and question
                    decoded_questions.append({
                        'category': question_category,
                        'question': decoded_question
                    })
    except Exception as e:
        print(f"Error decoding file {encoded_file_path}: {e}")
    
    return decoded_questions

# File paths
vocab_file1_path = r"D:\NLPInsuranceProject\NLPINSURANCE-FINTECHPROJ\dataset insurance qa\vocabulary"
vocab_file2_path = r"D:\NLPInsuranceProject\NLPINSURANCE-FINTECHPROJ\vocabulary.txt"
train_file_path = r"D:\NLPInsuranceProject\NLPINSURANCE-FINTECHPROJ\dataset insurance qa\InsuranceQA.question.anslabel.raw.1500.pool.solr.train.encoded.gz"
test1_file_path = r"D:\NLPInsuranceProject\NLPINSURANCE-FINTECHPROJ\dataset insurance qa\InsuranceQA.question.anslabel.raw.1500.pool.solr.test.encoded.gz"
test2_file_path = r"D:\NLPInsuranceProject\NLPINSURANCE-FINTECHPROJ\dataset insurance qa\InsuranceQA.question.anslabel.raw.1000.pool.solr.test.encoded.gz"

# Load both vocabulary files
vocab_dict1 = load_vocabulary(vocab_file1_path)
vocab_dict2 = load_vocabulary(vocab_file2_path)

# Decode only questions
train_questions = decode_questions(train_file_path, vocab_dict1, vocab_dict2)
test1_questions = decode_questions(test1_file_path, vocab_dict1, vocab_dict2)
test2_questions = decode_questions(test2_file_path, vocab_dict1, vocab_dict2)

# Combine all decoded questions
all_questions_decoded = train_questions + test1_questions + test2_questions

# Save decoded data as a JSON file
output_file_path = r"D:\NLPInsuranceProject\decoded_questions.json"
with open(output_file_path, 'w', encoding='utf-8') as f_out:
    json.dump(all_questions_decoded, f_out, indent=4, ensure_ascii=False)

print(f"✅ Decoding complete! Results saved to {output_file_path}")

# Display a few decoded examples
print(json.dumps(all_questions_decoded[:3], indent=4, ensure_ascii=False))


✅ Decoding complete! Results saved to D:\NLPInsuranceProject\decoded_questions.json
[
    {
        "category": "disability-insurance",
        "question": "Is Disability Insurance Required By Law?"
    },
    {
        "category": "life-insurance",
        "question": "Can Creditors Take Life Insurance After Death?"
    },
    {
        "category": "renters-insurance",
        "question": "Does Travelers Insurance Have Renters Insurance?"
    }
]


In [16]:
pip install speech_recognition

Note: you may need to restart the kernel to use updated packages.


ERROR: Could not find a version that satisfies the requirement speech_recognition (from versions: none)
ERROR: No matching distribution found for speech_recognition


In [33]:
import sqlite3
import speech_recognition as sr
import pyttsx3
import numpy as np
from openai import OpenAI
from sentence_transformers import SentenceTransformer
from IPython.display import display, Markdown

# ✅ Set Up OpenRouter API
API_KEY = "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"
client = OpenAI(
    base_url="https://openrouter.ai/api/v1",
    api_key=API_KEY,
    default_headers={ 
        "Authorization": f"Bearer {API_KEY}",  
        "X-Title": "Insurance Chatbot"
    }
)

# ✅ Load Fine-Tuned Sentence Transformer
model = SentenceTransformer("all-MiniLM-L6-v2")

# ✅ Load Insurance Questions
import json
with open(r"D:\NLPInsuranceProject\decoded_questions.json", "r", encoding="utf-8") as file:
    insurance_data = json.load(file)

questions = [entry["question"] for entry in insurance_data]
question_embeddings = model.encode(questions)

# ✅ Set Up SQLite for Memory
conn = sqlite3.connect("chatbot_memory.db", check_same_thread=False)
cursor = conn.cursor()
cursor.execute("CREATE TABLE IF NOT EXISTS chatbot_memory (query TEXT, response TEXT)")
conn.commit()

# ✅ Function to Search Memory
def search_memory(user_query):
    cursor.execute("SELECT response FROM chatbot_memory WHERE query=?", (user_query,))
    result = cursor.fetchone()
    return result[0] if result else None

# ✅ Function to Store in Memory
def store_memory(user_query, response):
    cursor.execute("INSERT INTO chatbot_memory (query, response) VALUES (?, ?)", (user_query, response))
    conn.commit()

# ✅ Function for Semantic Search
def find_relevant_questions(user_query):
    query_embedding = model.encode([user_query])
    similarities = np.dot(query_embedding, question_embeddings.T)[0]
    top_indices = np.argsort(similarities)[-3:][::-1]
    relevant_questions = [questions[i] for i in top_indices]
    return relevant_questions

# ✅ Function to Get Insurance Answer
def get_insurance_response(user_query):
    # Step 1: Check Memory
    memory_response = search_memory(user_query)
    if memory_response:
        return f"[From Memory] {memory_response}"

    # Step 2: Find Relevant Questions
    relevant_questions = find_relevant_questions(user_query)

    # Step 3: Call OpenRouter (DeepSeek R1)
    try:
        response = client.chat.completions.create(
            model="deepseek/deepseek-r1:free",
            messages=[
                {"role": "system", "content": "You are an insurance chatbot based from India who follows rules and regulations related to insurance for India. Answer insurance-related questions only. If non-insurance related questions asked please politely deny saying i am not made for this domain"},
                {"role": "user", "content": f"User Query: {user_query}\nRelevant Questions: {relevant_questions}"}
            ],
            temperature=0.3,
        )
        chatbot_response = response.choices[0].message.content
        store_memory(user_query, chatbot_response)  # Save response in memory
        return chatbot_response
    except Exception as e:
        return f"Error: {str(e)}"

# ✅ Voice Input & Output Functions
def recognize_speech():
    recognizer = sr.Recognizer()
    with sr.Microphone() as source:
        print("🎤 Listening...")
        recognizer.adjust_for_ambient_noise(source)
        audio = recognizer.listen(source)
    try:
        return recognizer.recognize_google(audio)
    except sr.UnknownValueError:
        return "Sorry, I couldn't understand."
    except sr.RequestError:
        return "Speech recognition service is unavailable."

def speak_response(response):
    engine = pyttsx3.init()
    engine.say(response)
    
    try:
        engine.runAndWait()  # Normal execution
    except RuntimeError:
        engine.endLoop()  # Stop the current event loop
        engine.runAndWait() 

# ✅ Main Chat Loop (Conversation Mode)
def start_chatbot():
    print("💬 Insurance Chatbot Started! Type 'voice' for voice input. Say 'thank you', 'ok', or 'exit' to stop.")
    
    while True:
        user_input = input("👤 You: ").strip().lower()
        print("User question: " ,user_input)

        # Handle voice input
        if user_input == "voice":
            user_input = recognize_speech()
            print(f"👤 You (via voice): {user_input}")

        # Check for exit words
        if user_input in ["thank you", "thanks", "ok", "exit", "goodbye", "bye"]:
            print("🤖 Bot: You're welcome! Have a great day! 😊")
            speak_response("You're welcome! Have a great day!")
            break

        # Get response
        bot_response = get_insurance_response(user_input)
    
        display(Markdown(f"🤖 Bot: {bot_response}"))
        speak_response(bot_response)

        # Ask if further help is needed
        follow_up = input("🤖 Bot: Do you need help with anything else? (yes/no) ").strip().lower()
        if follow_up in ["no", "thank you", "thanks", "ok", "exit", "goodbye", "bye"]:
            print("🤖 Bot: Have a great day! 😊")
            speak_response("Have a great day!")
            break

# ✅ Run the Chatbot
if __name__ == "__main__":
    start_chatbot()


💬 Insurance Chatbot Started! Type 'voice' for voice input. Say 'thank you', 'ok', or 'exit' to stop.
User question:  is disability insurance required by law?


🤖 Bot: In India, **disability insurance is not universally required by law for individuals**. However, certain **employer-specific obligations** exist under social security laws:

1. **Employees' State Insurance (ESI) Act, 1948**:  
   Employers with 10+ employees (in notified establishments) must contribute to the ESI scheme, which includes **temporary or permanent disability benefits** due to work-related injuries or occupational diseases. This is mandatory for eligible employees earning up to ₹21,000/month.

2. **Workmen’s Compensation Act, 1923**:  
   Employers are legally required to compensate employees for disabilities arising from workplace accidents or occupational hazards.

### For Individuals:
- **Private disability insurance** (covering non-work-related disabilities) is **voluntary**.  
- Government schemes like *Pradhan Mantri Suraksha Bima Yojana* (accidental disability coverage) are optional but recommended.

### Key Takeaway:
While disability coverage is **legally mandated for employers in specific cases**, individuals should consider **supplementing with private insurance** for comprehensive protection beyond statutory limits. Let me know if you need help choosing a policy!

User question:  is disability insurance required by law?


🤖 Bot: [From Memory] In India, **disability insurance is not universally required by law for individuals**. However, certain **employer-specific obligations** exist under social security laws:

1. **Employees' State Insurance (ESI) Act, 1948**:  
   Employers with 10+ employees (in notified establishments) must contribute to the ESI scheme, which includes **temporary or permanent disability benefits** due to work-related injuries or occupational diseases. This is mandatory for eligible employees earning up to ₹21,000/month.

2. **Workmen’s Compensation Act, 1923**:  
   Employers are legally required to compensate employees for disabilities arising from workplace accidents or occupational hazards.

### For Individuals:
- **Private disability insurance** (covering non-work-related disabilities) is **voluntary**.  
- Government schemes like *Pradhan Mantri Suraksha Bima Yojana* (accidental disability coverage) are optional but recommended.

### Key Takeaway:
While disability coverage is **legally mandated for employers in specific cases**, individuals should consider **supplementing with private insurance** for comprehensive protection beyond statutory limits. Let me know if you need help choosing a policy!

User question:  what are available insurance for these in india


🤖 Bot: In India, various insurance options are available across different categories, including life, health, motor, and disability insurance. Below is a breakdown based on your query:

---

### **1. Insurance Policies Under ₹16,000/Year (Approx. $200)**  
Premiums vary by age, coverage, and policy type, but here are affordable options:  
- **Term Life Insurance**:  
  - **LIC** (e-Term Plan), **HDFC Life**, **SBI Life**, **ICICI Pru Life**, **Max Life**.  
  - *Example*: A 30-year-old can get ₹1 crore coverage for ₹8,000–12,000/year.  

- **Health Insurance**:  
  - **Star Health**, **Aditya Birla Health**, **Niva Bupa**, **Care Health**.  
  - *Example*: A basic ₹5 lakh family floater plan costs ₹10,000–15,000/year.  

- **Motor Insurance**:  
  - Third-party bike insurance starts at ₹700–2,000/year; car insurance (third-party) from ₹2,000–7,000/year.  

- **Personal Accident Insurance**:  
  - **Bajaj Allianz**, **Tata AIG**, **Oriental Insurance**.  
  - *Example*: ₹10 lakh coverage for ₹500–1,500/year.  

---

### **2. Short-Term Disability Insurance**  
In India, standalone short-term disability insurance is rare, but coverage is often included in:  
- **Health Insurance Riders**: Add-ons for temporary disability due to accidents (e.g., HDFC Ergo, ICICI Lombard).  
- **Personal Accident Policies**: Covers disability (partial/total) from accidents.  
- **Government Schemes**:  
  - **Pradhan Mantri Suraksha Bima Yojana (PMSBY)**: ₹12/year for ₹2 lakh accidental disability/death coverage.  
  - Employers’ **Employee State Insurance Corporation (ESIC)** benefits (for organized sector workers).  

---

### **Key Notes**  
- Premiums depend on age, health, occupation, and coverage.  
- Always compare policies on **IRDAI-approved platforms** (Policybazaar, Coverfox) or consult agents.  
- Read terms for exclusions (e.g., pre-existing diseases in health insurance).  

Let me know if you need help with a specific policy! 🛡️

🤖 Bot: Have a great day! 😊
