In [1]:
import os

# ✅ Fix OpenMP crash (OMP: Error #15)
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"

import os
import chromadb
import torch
from transformers import AutoTokenizer, AutoModel
from ctransformers import AutoModelForCausalLM
import pandas as pd
import numpy as np
from tqdm import tqdm
import textwrap

In [2]:
# Load the Excel file
file_path = "data\Hospice Text.xlsx"
df = pd.read_excel(file_path)


  file_path = "data\Hospice Text.xlsx"


In [3]:
# Load embedding model
MODEL_NAME = "BAAI/bge-large-en-v1.5"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModel.from_pretrained(MODEL_NAME)


In [4]:
def get_embedding(text):
    """Generate embeddings for text using BAAI/bge-large-en-v1.5."""
    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512)
    with torch.no_grad():
        outputs = model(**inputs)
    return outputs.last_hidden_state[:, 0, :].squeeze().numpy()

In [5]:
def chunk_text(text, chunk_size=512, overlap=128):
    """Splits text into overlapping chunks."""
    words = text.split()
    chunks = []
    for i in range(0, len(words), chunk_size - overlap):
        chunk = " ".join(words[i:i + chunk_size])
        chunks.append(chunk)
    return chunks

In [6]:
# Connect to ChromaDB
chroma_client = chromadb.PersistentClient(path="./chroma_db")
collection = chroma_client.get_or_create_collection(name="hospice_texts")

In [7]:
# Insert documents with chunking and metadata
for _, row in tqdm(df.iterrows(), total=len(df)):
    text_chunks = chunk_text(row["Text"], chunk_size=512, overlap=128)
    for chunk_id, chunk in enumerate(text_chunks):
        embedding = get_embedding(chunk)
        collection.add(
            ids=[f"{_}_chunk{chunk_id}"],
            embeddings=[embedding.tolist()],
            metadatas=[{
                "year": row["Year"],
                "type": row["Type"],
                "section": row["Section"],
                "text": chunk
            }]
        )

print("✅ Data successfully inserted into ChromaDB with structured metadata!")


  0%|          | 0/10 [00:00<?, ?it/s]Add of existing embedding ID: 0_chunk0
Add of existing embedding ID: 0_chunk1
Add of existing embedding ID: 0_chunk2
Add of existing embedding ID: 0_chunk3
Add of existing embedding ID: 0_chunk4
Add of existing embedding ID: 0_chunk5
Add of existing embedding ID: 0_chunk6
Add of existing embedding ID: 0_chunk7
Add of existing embedding ID: 0_chunk8
Add of existing embedding ID: 0_chunk9
Add of existing embedding ID: 0_chunk10
Add of existing embedding ID: 0_chunk11
Add of existing embedding ID: 1_chunk0
Add of existing embedding ID: 1_chunk1
Add of existing embedding ID: 1_chunk2
Add of existing embedding ID: 1_chunk3
Add of existing embedding ID: 1_chunk4
Add of existing embedding ID: 1_chunk5
Add of existing embedding ID: 2_chunk0
Add of existing embedding ID: 2_chunk1
Add of existing embedding ID: 2_chunk2
Add of existing embedding ID: 2_chunk3
Add of existing embedding ID: 2_chunk4
Add of existing embedding ID: 2_chunk5
Add of existing embeddin

✅ Data successfully inserted into ChromaDB with structured metadata!





In [14]:
# Function to retrieve relevant text from ChromaDB
def retrieve_text(query, limit=2):
    """Retrieve relevant text based on user query with improved relevance sorting."""
    query_embedding = get_embedding(query).tolist()
    results = collection.query(query_embeddings=[query_embedding], n_results=limit)
    
    if "metadatas" not in results or not results["metadatas"]:
        return []
    
    # Sort results by keyword overlap for better relevance
    query_words = set(query.lower().split())
    ranked_results = sorted(
        results["metadatas"][0],
        key=lambda doc: len(set(doc["text"].lower().split()) & query_words),
        reverse=True
    )
    
    return [doc["text"] for doc in ranked_results]

In [9]:
# Load Falcon-7B-Instruct model from .bin using ctransformers
FALCON_MODEL_PATH = os.path.join("models", "falcon-7b-instruct.ggccv1.q5_1.bin")

if not os.path.exists(FALCON_MODEL_PATH):
    raise FileNotFoundError(f"Model not found at {FALCON_MODEL_PATH}. Check the path and ensure the model is downloaded.")

falcon_model = AutoModelForCausalLM.from_pretrained(FALCON_MODEL_PATH, model_type="falcon")


In [15]:
def generate_response(query):
    """Retrieve relevant context and pass to Falcon-7B for response generation, enforcing strict token limits."""
    context_data = retrieve_text(query)

    if not context_data:
        return "No relevant information found."

    structured_context = "\n---\n".join(context_data)

    # Token Limit Enforcement
    max_total_tokens = 512  # Falcon-7B GGML max limit
    max_query_tokens = 50    # Reserve 50 tokens for the query
    max_response_tokens = 100 # Ensure response fits
    max_context_tokens = max_total_tokens - max_query_tokens - max_response_tokens

    # Truncate context and query if necessary
    context_words = structured_context.split()
    truncated_context = " ".join(context_words[:max_context_tokens])

    query_words = query.split()
    truncated_query = " ".join(query_words[:max_query_tokens])

    # Final prompt ensuring it fits within model's max token limit
    prompt = f"""You are an expert assistant answering a query based on retrieved regulatory documents. 
    Always provide fact-based answers and reference the provided text when possible.
    If the retrieved text does not answer the question, say 'Insufficient information'.

    Below is the relevant information:
    {truncated_context}

    Based on this, answer the following question in a well-structured and complete manner:

    Question: {truncated_query}

    Answer:"""

    try:
        llm_response = falcon_model(prompt, max_new_tokens=max_response_tokens)
        return llm_response
    except Exception as e:
        return f"Error: {e}"


In [16]:
user_query = "Compare hospice wage indexes from 2024 and 2025."
response = generate_response(user_query)
print(response)

Number of tokens (607) exceeded maximum context length (512).
Number of tokens (608) exceeded maximum context length (512).
Number of tokens (609) exceeded maximum context length (512).
Number of tokens (610) exceeded maximum context length (512).
Number of tokens (611) exceeded maximum context length (512).
Number of tokens (612) exceeded maximum context length (512).
Number of tokens (613) exceeded maximum context length (512).
Number of tokens (614) exceeded maximum context length (512).
Number of tokens (615) exceeded maximum context length (512).
Number of tokens (616) exceeded maximum context length (512).
Number of tokens (617) exceeded maximum context length (512).
Number of tokens (618) exceeded maximum context length (512).
Number of tokens (619) exceeded maximum context length (512).
Number of tokens (620) exceeded maximum context length (512).
Number of tokens (621) exceeded maximum context length (512).
Number of tokens (622) exceeded maximum context length (512).
Number o

 1,) =type;
User:  a a is an certain individuals can be used to, as the.poster in the.
