In [3]:
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer

# load data
df = pd.read_csv("FEP_Blue_Focus_2026_metadata.csv")

# load model (cheap + good)
model = SentenceTransformer("all-MiniLM-L6-v2")  # 384-dim

In [5]:
df

Unnamed: 0,document_id,page_number,text,text_length,num_tables,tables
0,FEP_Blue_Focus_2026,1,Blue Cross® and Blue Shield® Service Benefit P...,1018,0,
1,FEP_Blue_Focus_2026,2,Important Notice from the Blue Cross and Blue ...,2916,0,
2,FEP_Blue_Focus_2026,3,Table of Contents\nIntroduction .................,8255,0,
3,FEP_Blue_Focus_2026,4,• Maternity care ................................,8340,0,
4,FEP_Blue_Focus_2026,5,2026 Rate Information for the Blue Cross and B...,240,0,
...,...,...,...,...,...,...
137,FEP_Blue_Focus_2026,138,Benefits You Pay Page\nDental care Treatment o...,1395,1,"['Benefits', 'You Pay', 'Page', 'Dental care',..."
138,FEP_Blue_Focus_2026,139,Notes\n2026 Blue Cross® and Blue Shield® Servi...,86,0,
139,FEP_Blue_Focus_2026,140,Notes\n2026 Blue Cross® and Blue Shield® Servi...,86,0,
140,FEP_Blue_Focus_2026,141,Notes\n2026 Blue Cross® and Blue Shield® Servi...,86,0,


In [7]:
# embed text
embeddings = model.encode(
    df["text"].tolist(),
    batch_size=32,
    show_progress_bar=True,
    normalize_embeddings=True  # IMPORTANT for cosine similarity
)

embeddings = np.array(embeddings).astype("float32")

Batches: 100%|██████████| 5/5 [00:00<00:00,  8.97it/s]


In [8]:
import faiss

dim = embeddings.shape[1]

index = faiss.IndexFlatIP(dim)  # cosine similarity
index.add(embeddings)

print("Total vectors in index:", index.ntotal)

Total vectors in index: 142


In [11]:
from sentence_transformers import CrossEncoder

reranker = CrossEncoder("cross-encoder/ms-marco-MiniLM-L-6-v2")
def retrieve_and_rerank(query, k=10, top_n=5):
    q_emb = model.encode(
        [query],
        normalize_embeddings=True
    ).astype("float32")

    scores, indices = index.search(q_emb, k)

    candidates = [
        {
            "score": float(scores[0][i]),
            "text": df.iloc[idx]["text"]
        }
        for i, idx in enumerate(indices[0])
    ]

    rerank_inputs = [[query, c["text"]] for c in candidates]
    rerank_scores = reranker.predict(rerank_inputs)

    for i in range(len(candidates)):
        candidates[i]["rerank_score"] = float(rerank_scores[i])

    candidates = sorted(candidates, key=lambda x: x["rerank_score"], reverse=True)

    return candidates[:top_n]

query = "What is the preffered and non-preferred for maternity care?"
results = retrieve_and_rerank(query, k=10, top_n=5)

In [14]:
for res in results:
    print(f"Score: {res['score']:.4f}, Rerank Score: {res['rerank_score']:.4f}")
    print(f"Text: {res['text']}")
    print("-----")

Score: 0.6337, Rerank Score: 1.5450
Text: FEP Blue Focus
Benefit Description You Pay
Maternity Care (cont.) FEP Blue Focus
• Maternity care benefits are not provided for prescription drugs required Preferred: Nothing (no deductible)
during pregnancy, except as recommended under the Affordable Care Act.
Note: For Preferred facility care related to
See Section 5(f) for other prescription drug coverage.
maternity, including care at Preferred birthing
facilities, your responsibility for covered facility
Notes:
care is limited to $2,500 per pregnancy. See
• You do not need to precertify your delivery; see Section 3 for other
Section 5(c).
circumstances, such as extended stays for you or your newborn.
Non-preferred (Participating/Non-participating):
• You may remain in the hospital up to 48 hours after a vaginal delivery and
You pay all charges
96 hours after a cesarean delivery. We will cover an extended stay if
medically necessary.
Note: When care is provided by a Non-preferred
• We cover 

In [15]:
# Create LLM return function

import boto3
import json

bedrock = boto3.client("bedrock-runtime", region_name="us-east-1")

def call_bedrock_llama(prompt):
    response = bedrock.invoke_model(
        modelId="meta.llama3-70b-instruct-v1:0",
        body=json.dumps({
            "prompt": prompt,
            "max_gen_len": 1500,
            "temperature": 0,
            "top_p": 0.9
        })
    )

    result = json.loads(response["body"].read())
    return result["generation"]
  

def build_context(results):
    return "\n\n".join(
        f"- {r['text']}" for r in results
    )


def build_prompt(query, context):
    return f"""
<|begin_of_text|>

<|system|>
You are an expert healthcare benefits assistant.

Your task is to answer the user's question using ONLY the provided plan excerpts.

Rules:
- Use only the information explicitly present in the context.
- Do NOT hallucinate or invent missing details.
- If the information is not stated, respond with: "Not mentioned."
- If the information can be reasonably inferred but is not explicit, respond with: "Inferred."
- Do NOT include information outside the provided excerpts.
- Do NOT provide legal or medical advice.
- Do NOT change the meaning of the source material.
- Provide a concise, factual answer only.

When applicable:
- Clearly distinguish between Preferred and Non-Preferred benefits.
- If only one is mentioned, state that the other is "Not mentioned."
- Include cost-sharing details if present (copay, coinsurance, deductible, limits).
<|/system|>

<|user|>
Context:
{context}

Question:
{query}

Answer:
<|/user|>
"""

In [16]:
import streamlit as st

st.title("RAG Demo (FAISS + Re-ranker + Bedrock)")

query = st.text_input("Ask a question about your benefits plan:")

if query:
    with st.spinner("Thinking..."):
        results = retrieve_and_rerank(query, k=10, top_n=5)

        context = build_context(results)
        prompt = build_prompt(query, context)

        answer = call_bedrock(prompt)

    st.subheader("Answer")
    st.write(answer)

    with st.expander("Retrieved context"):
        for r in results:
            st.markdown(f"**Score:** {r['rerank_score']:.4f}")
            st.write(r["text"])
            st.divider()

2025-12-14 17:44:08.450 
  command:

    streamlit run /Users/maukanmir/miniforge3/envs/semantic-search/lib/python3.11/site-packages/ipykernel_launcher.py [ARGUMENTS]
2025-12-14 17:44:08.451 Session state does not function when running a script without `streamlit run`
