In [1]:
import os
import PyPDF2
import openai
# from dotenv import load_dotenv, find_dotenv
import faiss
import pickle
import numpy as np
from sentence_transformers import SentenceTransformer

  from .autonotebook import tqdm as notebook_tqdm





In [None]:
import openai

# # Load the .env file
# load_dotenv()

# # Get the API key
# api_key = os.getenv("OPENAI_API_KEY")

# Set your API key directly (Replace 'your-api-key' with your actual API key)
openai.api_key = ""


In [3]:
def ask_chatbot(question, context=None):
    messages = [
        {"role": "system", "content": "You are an insurance assistant helping with policy-related queries."}
    ]
    
    # If context is provided, include it in the message
    if context and context.strip():
        messages.append({"role": "user", "content": f"Here is some relevant policy information:\n\n{context}\n\nNow, answer the question: {question}"})
    else:
        messages.append({"role": "user", "content": question})

    response = openai.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=messages
    )
    return response.choices[0].message.content

In [6]:
def load_faiss_index(index_path="faiss_index.index"):
    return faiss.read_index(index_path)

def load_chunk_metadata(metadata_path="chunk_metadata.pkl"):
    with open(metadata_path, 'rb') as f:
        return pickle.load(f)
    
def load_chunk(metadata_path="chunk.pkl"):
    with open(metadata_path, 'rb') as f:
        return pickle.load(f)
    
def load_bm25_index(index_path="bm25_index.pkl"):
    with open(index_path, 'rb') as f:
        return pickle.load(f)
    
def expand_query_with_gpt(query):
    """Use GPT to generate variations of the query for better FAISS retrieval."""
    messages = [
        {"role": "system", "content": "You are a helpful assistant that generates multiple rewordings of a query to improve search performance."},
        {"role": "user", "content": f"Generate alternative phrasings of this query: {query}"}
    ]
    
    response = openai.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=messages
    )
    return response.choices[0].message.content.split("\n")

def get_relevant_context(query, index, chunk, bm25, model, k=5, alpha=0.5):
    query_vector = model.encode([query])
    faiss.normalize_L2(query_vector)
    D, I = index.search(query_vector, k)
    
    faiss_chunks = [chunk[i] for i in I[0] if i < len(chunk)]
    faiss_scores = [1 / (1 + d) for d in D[0]]  # Convert FAISS distances to similarity scores
    
    bm25_scores = bm25.get_scores(query.split())
    bm25_top_k = np.argsort(bm25_scores)[::-1][:k]
    bm25_chunks = [chunk[i] for i in bm25_top_k if i < len(chunk)]
    bm25_scores_top_k = [bm25_scores[i] for i in bm25_top_k]
    
    hybrid_scores = {}
    
    for i, ch in enumerate(faiss_chunks):
        hybrid_scores[ch] = alpha * faiss_scores[i] + (1 - alpha) * bm25_scores_top_k[i] if i < len(bm25_scores_top_k) else faiss_scores[i]
    
    for i, ch in enumerate(bm25_chunks):
        if ch not in hybrid_scores:
            hybrid_scores[ch] = (1 - alpha) * bm25_scores_top_k[i]
    
    ranked_chunks = sorted(hybrid_scores.keys(), key=lambda x: hybrid_scores[x], reverse=True)
    
    print("\n🔍 **Retrieved Chunks for Query:**", query)
    for i, retrieved_chunk in enumerate(ranked_chunks):
        print(f"\n--- Retrieved Chunk {i+1} ---\n{retrieved_chunk}\n")
    
    if not ranked_chunks:
        print("⚠️ No relevant chunks found by FAISS or BM25!")
    
    return "\n\n".join(ranked_chunks) if ranked_chunks else "No relevant context found."



def answer_insurance_query(question):
    index = load_faiss_index()
    chunk = load_chunk()
    bm25 = load_bm25_index()
    model = SentenceTransformer("all-MiniLM-L6-v2")
    
    context = get_relevant_context(question, index, chunk, bm25, model)
    return ask_chatbot(question, context)

In [9]:
def test_integrated_system():
    test_questions = [
        # "What are the benefits of this health insurance policy?",
        # "How do I file a claim under this policy?",
        # "What's the waiting period for pre-existing conditions?",
        "Does this policy cover dental procedures?"
        # "What is the special waiting period"
        
    ]
    
    for question in test_questions:
        print(f"Question: {question}")
        response = answer_insurance_query(question)
        print(f"Response: {response}\n")

# Run the integrated system test
test_integrated_system()

Question: Does this policy cover dental procedures?

🔍 **Retrieved Chunks for Query:** Does this policy cover dental procedures?

--- Retrieved Chunk 1 ---
§ 18 Modifications of these General Terms and Conditions of 

Insurance.................................. 17


--- Retrieved Chunk 2 ---
§ 1 Subject, scope and scope of application of insurance coverage ................................ ............. 2 




--- Retrieved Chunk 3 ---
§ 5 Restriction of the duty to render benefits 

I (1) No duty shall exist to render benefits: a) for those illnesses and the consequences thereof and for the consequences of accidents and fatal events caused by events of war or recognized as a military injury which are not ex - pressly included in the insurance coverage; b) for illnesses and accidents based on intentional action and the consequences thereof and for detoxification measures, including detoxifica - tion programs; c) for treatment by physicians, dentists, alterna - tive practitioners and in 