<a href="https://colab.research.google.com/github/CWNDrohan/PensionRAG/blob/main/Data606_Final_v1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Step 1 — Install Libraries & Imports

In [None]:
# 🛠️ Install Required Libraries (FAISS + LlamaIndex only)
!pip install -q \
    llama-index \
    llama-index-vector-stores-faiss \
    llama-index-embeddings-huggingface \
    llama-index-llms-huggingface \
    sentence-transformers transformers \
    pdfplumber PyMuPDF \
    faiss-cpu

# 📥 Step 1b: Import All Necessary Libraries
import os
import json
import torch
import faiss
import shutil
import fitz  # PyMuPDF
import pdfplumber
import re
import pprint  # 🔍 Pretty-printing for debug visibility
import pandas as pd
from google.colab import drive, userdata
from datetime import datetime

# ✅ Transformers
from transformers import AutoModelForCausalLM, AutoTokenizer
from sentence_transformers import SentenceTransformer

# ✅ LlamaIndex Imports
from llama_index.core import VectorStoreIndex, StorageContext, load_index_from_storage, Document
from llama_index.vector_stores.faiss import FaissVectorStore
from llama_index.llms.huggingface import HuggingFaceLLM
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core.node_parser import SentenceSplitter, SemanticSplitterNodeParser

# 📏 Global Chunking Configuration
CHUNK_SIZE = 384
CHUNK_OVERLAP = 96

# ✅ Centralized keyword list
KEYWORDS = [
    "pension formula",
    "early retirement",
    "benefit reduction",
    "final average salary",
    "penalty table"
]

# 🔗 Step 1c: Mount Google Drive
drive.mount('/content/drive')

Step 2 — Verify Documents & Tokens are Accessible

In [None]:
# 📂 Define Paths and Load JSON Knowledge Base

# 📄 Define permanent location
drive_dir = "/content/drive/My Drive/School/UMBC/DATA606/Input"
kb_filename = "knowledge_base.json"
pension_pdf_path = os.path.join(drive_dir, "NYCERS_Tier6.pdf")

# 🧠 Wrapped loader
def load_latest_kb(temp_dir="/content", drive_dir=drive_dir, filename=kb_filename):
    # 🔍 Auto-locate temp file
    temp_json = None
    for file in os.listdir(temp_dir):
        if file.endswith(".json"):
            temp_json = os.path.join(temp_dir, file)
            print(f"📂 Found new JSON in temp directory: {file}")  # ✅ NEW LINE
            break

    drive_json_path = os.path.join(drive_dir, filename)

    # 🔄 Copy and rename
    if temp_json:
        shutil.copy(temp_json, drive_json_path)
        mod_time = os.path.getmtime(drive_json_path)
        timestamp = datetime.fromtimestamp(mod_time).strftime("%Y-%m-%d %H:%M:%S")
        print(f"✅ Knowledge base copied to Drive as: {filename}")
        print(f"🕒 Last modified: {timestamp}")
    else:
        print("⚠️ No new JSON found in /content. Using existing file in Drive.")

    # 📖 Load
    if os.path.exists(drive_json_path):
        with open(drive_json_path, "r") as f:
            kb = json.load(f)
            print("✅ Knowledge base loaded with keys:", list(kb.keys()))
            return kb
    else:
        raise FileNotFoundError("❌ ERROR: No knowledge_base.json found in Drive!")

# ✅ Load KB
knowledge_base = load_latest_kb()

# 📄 Validate PDF path
if os.path.exists(pension_pdf_path):
    print("✅ Pension PDF is accessible:", pension_pdf_path)
else:
    print("❌ ERROR: Pension PDF not found! Check the file path.")

# 🔐 Retrieve Hugging Face token from Colab's secrets
huggingface_token = userdata.get("HF_TOKEN")
if huggingface_token:
    print("✅ Hugging Face token retrieved successfully!")
else:
    print("❌ ERROR: Hugging Face token not found! Make sure it's saved in Colab.")

Step 3 -- Extract, Clean, Tag, and Index Pension Text and Tables for RAG Processing

In [None]:
# ✅ Extract, Clean, and Tag Pension Text + Tables

# 🧼 Clean raw text (remove headers, collapse whitespace, etc.)
def clean_raw_text(text):
    text = re.sub(r'\n?\d{1,3}\nSummary Plan Description[^\n]*', '', text)  # Remove headers
    text = re.sub(r'\n{2,}', '\n\n', text)                                  # Collapse newlines
    text = re.sub(r'\n\s+\n', '\n\n', text)                                 # Remove whitespace-only lines
    return text.strip()

# 📄 Extract full text from PDF (fitz for layout)
def extract_raw_text_from_pdf(pdf_path):
    with fitz.open(pdf_path) as doc:
        return "\n".join([page.get_text("text") for page in doc])

# 📊 Extract tables from PDF (pdfplumber is best for tables)
def extract_tables_from_pdf(pdf_path):
    extracted_tables = []
    with pdfplumber.open(pdf_path) as pdf:
        for page in pdf.pages:
            for table in page.extract_tables():
                df = pd.DataFrame(table)
                extracted_tables.append(df.to_dict(orient="records"))
    return extracted_tables

# 🧠 Optional metadata tagging for known table types
def tag_table_metadata(table_data, table_id):
    table_text = str(table_data)

    # ✅ Base keywords via ID-specific tagging
    if table_id in [5, 6]:
        manual_keywords = ["pension formula", "final average salary", "35% FAS", "2% additional"]
        nl_summary = (
            "This table describes the pension formula:\n"
            "- If you have less than 20 years of service: 1.67% × Final Average Salary × Years of Service.\n"
            "- If you have 20 or more years: 35% of FAS for the first 20 years, plus 2% for each year beyond 20.\n"
        )
        table_text = nl_summary + "\n" + table_text
    elif table_id == 7:
        manual_keywords = ["early retirement", "age reduction", "6.5%", "penalty table"]
    else:
        manual_keywords = []

    # ✅ Dynamically match keywords from table content
    dynamic_keywords = [kw for kw in KEYWORDS if kw in table_text.lower()]

    # ✅ Combine both sets (deduplicated)
    all_keywords = sorted(set(manual_keywords + dynamic_keywords))

    return Document(
        text=table_text,
        metadata={
            "table_id": table_id,
            "source": f"table_{table_id}",
            "table_keywords": all_keywords
        }
    )

# 🏗️ Run pipeline
raw_text = clean_raw_text(extract_raw_text_from_pdf(pension_pdf_path))
extracted_tables = extract_tables_from_pdf(pension_pdf_path)

# 📦 Wrap raw text and tagged tables into Document objects
combined_docs = [Document(text=raw_text, metadata={"type": "full_text"})]
combined_docs += [tag_table_metadata(tbl, i) for i, tbl in enumerate(extracted_tables)]

print(f"✅ Extracted raw text ({len(raw_text):,} characters)")
print(f"✅ Extracted {len(extracted_tables)} tables and tagged key pension tables.")

Step 4 — Build & Verify the Base Index

In [None]:
# ✅ Chunk Pension Text + Tables and Build FAISS Vector Index

# ✅ Define paths
faiss_index_path = "/content/faiss_index"

# 🧼 Remove old FAISS index (if it exists)
if os.path.exists(faiss_index_path):
    shutil.rmtree(faiss_index_path)
print("🧼 Old FAISS index removed.")

# 🔍 Optional: Measure total character length of all input documents
total_chars = sum(len(doc.text) for doc in combined_docs)
print(f"🔍 Total combined length across all documents: {total_chars:,} characters")

# ✅ Use sentence-based chunking for precise control
splitter = SentenceSplitter(chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP)
nodes = splitter.get_nodes_from_documents(combined_docs)
print(f"📦 Chunking complete: {len(nodes)} chunks created")

# 🔍 Preview a few chunks
print("\n🔍 Sample of Chunked Nodes:\n")
for i, node in enumerate(nodes[:3]):
    print(f"🔹 Chunk {i+1}")
    pprint.pprint(node.metadata)
    print(node.text[:500])
    print("-" * 100)

# ✅ Define FAISS index and vector store
embed_model = HuggingFaceEmbedding(model_name="sentence-transformers/all-mpnet-base-v2")
faiss_index = faiss.IndexFlatL2(768)
vector_store = FaissVectorStore(faiss_index=faiss_index)
storage_context = StorageContext.from_defaults(vector_store=vector_store)

# ✅ Build and persist the vector index
pension_index = VectorStoreIndex(
    nodes=nodes,
    storage_context=storage_context,
    embed_model=embed_model
)
pension_index.storage_context.persist(persist_dir=faiss_index_path)
print("✅ FAISS index with sentence-based chunking saved.")

# 🔍 Confirm FAISS index structure
print("\n📚 FAISS Index Sample Preview:\n")
retriever = pension_index.as_retriever(similarity_top_k=3)
sample_query = "What is the pension formula?"
retrieved = retriever.retrieve(sample_query)

for i, node in enumerate(retrieved):
    print(f"🔹 Result {i+1}")
    pprint.pprint(node.metadata)
    print(node.text[:800])
    print("-" * 100)

Step 5 — Load and Configure LLM with Query Engine

In [None]:
# ✅ Load Mistral-7B-Instruct-v0.1 model using HuggingFaceLLM

# 🔁 Swap in new model version
model_id = "mistralai/Mistral-7B-Instruct-v0.1"

# 🧠 Load tokenizer to get eos_token_id
tokenizer = AutoTokenizer.from_pretrained(model_id)
eos_token_id = tokenizer.eos_token_id

# ✅ Configure LLM with optimized generation parameters
llm = HuggingFaceLLM(
    context_window=3900,
    max_new_tokens=512,
    generate_kwargs={
        "do_sample": False,         # Deterministic response
        "temperature": 0.0,
        "top_p": 1.0,
        "top_k": 50,
        "pad_token_id": eos_token_id
    },
    tokenizer_name=model_id,
    model_name=model_id,
    device_map="auto",
    tokenizer_kwargs={"use_fast": True},
    model_kwargs={"torch_dtype": "auto"}  # Use float16 if memory is tight
)

# ✅ Reinitialize query engine with updated LLM
query_engine = pension_index.as_query_engine(llm=llm)

print("✅ Mistral-7B-Instruct-v0.1 loaded successfully and ready to go!")

Step 6 -- Add Metadata-Aware Reranking Function

In [None]:
# ✅ Generalized Metadata-Aware Reranking Function
def rerank_with_metadata_priority(nodes, keyword_weights=None):
    """
    Boosts scores of nodes that match specified metadata keywords.

    Args:
        nodes (List[NodeWithScore]): Retrieved nodes from query_engine.
        keyword_weights (dict): Keys = keywords or phrases, values = boost weights.

    Returns:
        List[NodeWithScore]: Re-ranked nodes sorted by boosted score.
    """
    if keyword_weights is None:
        keyword_weights = {kw: 0.2 for kw in KEYWORDS}
        keyword_weights["pension formula"] = 0.3  # Slightly boost the most important one

    reranked = []
    for node in nodes:
        base_score = node.score or 0
        metadata = node.metadata or {}
        keywords = metadata.get("table_keywords", [])
        boost = sum(weight for kw, weight in keyword_weights.items() if any(kw in k.lower() for k in keywords))
        node.score = base_score + boost
        reranked.append(node)

    return sorted(reranked, key=lambda x: x.score, reverse=True)

Step 7 -- Generalized retrieval + metadata injection + reranking

In [None]:
# ✅ Generalized retrieval + metadata injection + reranking
def get_reranked_nodes(query, index, reranker_fn, keyword_list=KEYWORDS):
    from llama_index.core.schema import NodeWithScore

    # Step 1: Vector-based retrieval
    retrieved_nodes = query_engine.retrieve(query)

    # Step 2: Inject nodes with matching metadata keywords
    extra_nodes = []
    for node in index.docstore.docs.values():
        metadata = node.metadata or {}
        keywords = metadata.get("table_keywords", [])
        if any(kw.lower() in keyword_list for kw in keywords):
            extra_nodes.append(NodeWithScore(node=node, score=0.0))

    # Step 3: Combine both sets, deduplicating
    combined_node_ids = {n.node.node_id if isinstance(n, NodeWithScore) else n.node_id for n in retrieved_nodes}
    for extra in extra_nodes:
        if extra.node.node_id not in combined_node_ids:
            retrieved_nodes.append(extra)

    # Step 4: Rerank using provided reranker
    return reranker_fn(retrieved_nodes)

Step 8 -- Extract Inputs from natural language query

In [None]:
# ✅ Extract inputs from natural language user query

def extract_inputs(user_query: str) -> dict:
    """
    Extract retirement_age, years_of_service, final_salary, and list of salaries
    from a natural language pension query using regex-based parsing.
    """
    # Match $100,000 style
    dollar_matches = re.findall(r"\$\s*([\d,]{3,})", user_query)
    dollar_salaries = [int(s.replace(",", "")) for s in dollar_matches]
    print(f"💰 Found dollar-style salaries: {dollar_salaries}")

    # Match $110k style
    dollar_k_matches = re.findall(r"\$\s*(\d{2,3})\s*[kK]\b", user_query)
    dollar_k_salaries = [int(k) * 1000 for k in dollar_k_matches]
    print(f"💰 Found dollar-k style salaries: {dollar_k_salaries}")

    # Match 110k style (no $)
    plain_k_matches = re.findall(r"(?<!\$)(?<!\d)(\d{2,3})\s*[kK]\b", user_query)
    plain_k_salaries = [int(k) * 1000 for k in plain_k_matches]
    print(f"💰 Found plain-k style salaries: {plain_k_salaries}")

    # Combine all salaries and filter
    all_salaries = sorted(set(s for s in (
        dollar_salaries + dollar_k_salaries + plain_k_salaries
    ) if s >= 1000))
    print(f"💰 Combined salary list (filtered): {all_salaries}")

    final_salary = all_salaries[-1] if all_salaries else None
    final_salary_list = all_salaries if len(all_salaries) > 1 else None

    # Extract current age
    age_match = re.search(r"(?:I['’]m|I am|I'm)\s*(\d{2})", user_query)
    current_age = int(age_match.group(1)) if age_match else None
    print(f"🎂 Parsed current age: {current_age}")

    # Extract retirement age
    ret_match = re.search(r"(?:retir(?:e|ing|ement)[^\d]{0,10}|at age\s*)(\d{2})", user_query)
    retirement_age = int(ret_match.group(1)) if ret_match else current_age
    print(f"🎯 Parsed retirement age: {retirement_age}")

    # Extract current years of service
    yos_match = re.search(r"(?:worked|been here|employed).{0,20}?(\d{1,2})\s*(?:years|yrs)", user_query)
    current_yos = int(yos_match.group(1)) if yos_match else None
    print(f"🛠️ Parsed current YOS: {current_yos}")

    # Alternative YOS
    yos_alt_match = re.search(
        r"(?:after|for|with|total of)?\s*(\d{2})\s*(?:years|yrs)(?!\s*old)",
        user_query, re.IGNORECASE
    )
    yos_alt = int(yos_alt_match.group(1)) if yos_alt_match else None
    print(f"🛠️ Parsed alternative YOS: {yos_alt}")

    # Final YOS computation
    if current_yos and not yos_alt:
        if current_age and retirement_age:
            years_of_service = (retirement_age - current_age) + current_yos
        else:
            years_of_service = current_yos
    else:
        years_of_service = yos_alt or current_yos or None

    print(f"✅ Final computed Years of Service: {years_of_service}")
    print(f"✅ Final Salary for calculation: {final_salary}")

    # Flag whether retirement is before age 63
    penalty_flag = False
    if retirement_age and retirement_age < 63:
        penalty_flag = True

    # Include in return object
    return {
        "retirement_age": retirement_age,
        "years_of_service": years_of_service,
        "final_salary": final_salary,
        "salaries": final_salary_list,
        "penalty_flag": penalty_flag
    }

# 🔍 Run extraction with test user query
user_query = "I’m Kevin, I’m 56, I’ve worked here for 30 years, and I make $100,000."
inputs = extract_inputs(user_query)

print("\n🎯 FINAL PARSED INPUTS (to be used in model query and math):")
for key, val in inputs.items():
    print(f"{key}: {val}")

Step 9 -- No hard coded flag

In [None]:
# ✅ Step 9: Build question with penalty clarification for early retirement
ret_age = inputs["retirement_age"]
yos = inputs["years_of_service"]

if ret_age and ret_age < 63:
    question = (
        f"What is the pension formula for someone with {yos} years of service? "
        f"If they retire at age {ret_age}, which is before the normal retirement age of 63, "
        f"is there a penalty? If so, use the penalty table listed in the documentation and state the "
        f"exact penalty percentage associated with age {ret_age}. "
        f"⚠️ Do not guess, multiply, or estimate — quote the percentage directly from the table."
    )
else:
    question = (
        f"What is the pension formula for someone with {yos} years of service? "
        f"Is there a penalty if they retire at age {ret_age}?"
    )

print(f"\n🧠 Dynamic LLM Query:\n{question}")


Step 9a -- Raw Vector Retrieval Results

In [None]:
# ✅ Step 9a: Print raw vector retrieval results before reranking

# Use same question generated in Step 9
raw_nodes = query_engine.retrieve(question)

print("\n🔎 Raw Vector Retrieval Results (Pre-Reranking):\n")
for i, node in enumerate(raw_nodes[:5]):
    print(f"🔹 Raw Rank {i+1}")
    print("📄 Metadata:", node.metadata)
    print("🔎 Text Preview:\n", node.text[:800])
    print("-" * 100)

Step 10 -- Combine Reranked Chunks into Final LLM Context

In [None]:
# ✅ Combine top reranked chunks into a single context block

# 🔹 Retrieve + Rerank using defined logic
reranked_nodes = get_reranked_nodes(question, pension_index, rerank_with_metadata_priority)

max_chunks = 5  # You can adjust this if needed
context_blocks = [node.text.strip() for node in reranked_nodes[:max_chunks]]

# 🔍 Detect if penalty table was included
contains_penalty_table = any(
    "penalty table" in (node.metadata.get("table_keywords") or [])
    for node in reranked_nodes[:max_chunks]
)

# 🏷️ Add a context flag if relevant
penalty_flag = (
    "\n\n⚠️ Penalty table detected in context. Use it to apply early retirement reductions if age < 63.\n"
    if contains_penalty_table else ""
)

combined_context = "\n\n".join(context_blocks) + penalty_flag

print("✅ Combined context ready (length:", len(combined_context), "characters)\n")

# 🧠 Show which chunks were selected
print("🔍 Top Reranked Chunks Used in Context:\n")
for i, node in enumerate(reranked_nodes[:max_chunks]):
    print(f"🔹 Rank {i+1} | Score: {node.score:.4f}")
    print("📄 Metadata:", node.metadata)
    print("🔎 Text Preview:\n", node.text[:1000])
    print("-" * 100)

Step 11 -- Construct the Final Prompt

In [None]:
# ✅ Construct the final prompt using a dedicated system instruction block

system_prompt = """You are an expert pension plan assistant.

Use only the provided context to answer questions. Do not guess or make assumptions.

If the context includes a penalty table, apply it based on the retirement age.
Only say "no penalty" if the table or context explicitly says so.
"""

prompt_v10 = f"""{system_prompt}

Based on the documentation below, answer the question in clear, concise terms.

Context:
\"\"\"
{combined_context}
\"\"\"

Question: {question}
Answer:"""

# Debug: Preview the prompt going to the LLM
print("\n📝 Final Prompt Sent to LLM:\n", prompt_v10[:1000])  # Truncated to 1000 chars for readability

Step 12 -- Run the Model on the Prompt

In [None]:
# ✅ Get the model's response

response = llm.complete(prompt_v10)
response = response.text  # ✅ Convert to string before parsing!
print("\n💬 LLM Response:\n")
print(response)

Step 13 -- Parse Model Output and Print Python-Ready Values

In [None]:
# ✅ Step 13: Parse LLM response and extract values for Python pension math

def parse_llm_response(text: str, fallback_inputs: dict) -> dict:
    """
    Parses model output to extract:
    - base_rate (as decimal, e.g., 0.35 or 0.0167)
    - bonus_rate (e.g., 0.02 if mentioned)
    - bonus_years (derived from YOS - threshold)
    - penalty_rate (as decimal, 0.0 if no penalty)
    """
    # Extract numeric values directly from model output
    base_match = re.search(r"(\d{1,2}(?:\.\d+)?)%\s+of.*?(?:first\s+)?(\d{1,2})", text.lower())
    bonus_match = re.search(r"(\d{1,2}(?:\.\d+)?)%\s+for each year.*?(excess|beyond)", text.lower())

    base_rate = float(base_match.group(1)) / 100 if base_match else None
    threshold_years = int(base_match.group(2)) if base_match else 20  # fallback to 20

    # Fallback: look for simple "1.67% × FAS × YOS" style rule
    if not base_rate:
        alt_match = re.search(r"(\d{1,2}(?:\.\d+)?)\s*(?:%|percent).*?(final average salary|FAS)", text.lower())
        if alt_match:
            base_rate = float(alt_match.group(1)) / 100
            threshold_years = 0
            print(f"✅ Fallback base_rate parsed: {base_rate:.4f}")

    bonus_rate = float(bonus_match.group(1)) / 100 if bonus_match else None

    yos = fallback_inputs.get("years_of_service")
    bonus_years = yos - threshold_years if yos and yos > threshold_years else 0

    # Penalty rate extraction — safer fallback logic
    penalty_rate = 0.0
    if "no penalty" in text.lower():
        penalty_rate = 0.0
    else:
        # Try both phrasing styles
        match1 = re.search(r"(\d{1,2}(?:\.\d+)?)%\s*(reduction|penalty)", text.lower())
        match2 = re.search(r"(reduction|penalty).*?(\d{1,2}(?:\.\d+)?)%", text.lower())

        if match1:
            penalty_str = match1.group(1)
        elif match2:
            penalty_str = match2.group(2)
        else:
            penalty_str = None

        if penalty_str:
            try:
                penalty_rate = float(penalty_str) / 100
            except ValueError:
                print(f"❌ Failed to convert penalty string: {penalty_str}")
                penalty_rate = 0.0

    final_salary = fallback_inputs.get("final_salary")

    # Print debug values
    print("\n📤 Variables passed to Python pension calculator:")
    print(f" - Final Salary: ${final_salary:,}" if final_salary else " - Final Salary: ❌ MISSING")
    print(f" - Years of Service: {yos} years" if yos else " - Years of Service: ❌ MISSING")
    print(f" - Retirement Age: {fallback_inputs.get('retirement_age')}" if fallback_inputs.get("retirement_age") else " - Retirement Age: ❌ MISSING")
    print(f" - Base Rate: {base_rate * 100:.2f}%" if base_rate is not None else " - Base Rate: ❌ MISSING")
    print(f" - Bonus Rate: {bonus_rate * 100:.2f}%" if bonus_rate is not None else " - Bonus Rate: ❌ MISSING")
    print(f" - Bonus Years: {bonus_years}")
    print(f" - Penalty Rate: {penalty_rate * 100:.2f}%")

    # ✅ DEBUG: Show raw LLM response
    print("\n🧪 Raw LLM Response Used for Parsing:\n", text)

    return {
        "final_salary": final_salary,
        "years_of_service": yos,
        "retirement_age": fallback_inputs.get("retirement_age"),
        "base_rate": base_rate,
        "bonus_rate": bonus_rate,
        "bonus_years": bonus_years,
        "penalty_rate": penalty_rate,
    }

# 🚀 Run parser on model output (make sure this comes after response is generated in Step 12)
parsed_vars = parse_llm_response(response, inputs)

Step 14 -- Final Python Pension Calculation

In [None]:
# ✅ Calculate pension using parsed variables

def calculate_final_pension(vars: dict):
    """
    Calculate the pension using structured values:
    Pension = (Base Portion + Bonus Portion) × (1 - Penalty)
    """
    if not all([vars["final_salary"], vars["base_rate"], vars["years_of_service"]]):
        print("❌ Missing required inputs for pension calculation.")
        return

    salary = vars["final_salary"]
    base_rate = vars["base_rate"]
    bonus_rate = vars.get("bonus_rate", 0.0)
    bonus_years = vars.get("bonus_years", 0)
    penalty = vars.get("penalty_rate", 0.0)
    yos = vars.get("years_of_service")
    age = vars.get("retirement_age")

    base_portion = base_rate * salary
    bonus_portion = bonus_rate * salary * bonus_years
    subtotal = base_portion + bonus_portion
    adjusted = subtotal * (1 - penalty)

    penalty_pct = penalty * 100
    penalty_label = f"{penalty_pct:.2f}%" if penalty_pct > 0 else "0.00%"

    print("\n🧾 Pension Calculation Details:")
    print(f" - Final Salary: ${salary:,.2f}")
    print(f" - Years of Service: {yos}")
    print(f" - Retirement Age: {age}")
    print(f" - Base Portion: ${base_portion:,.2f}")
    print(f" - Bonus Portion: ${bonus_portion:,.2f}")
    print(f" - Subtotal Before Penalty: ${subtotal:,.2f}")
    print(f" - Penalty Applied: {penalty_label}")
    print(f"\n💵 Final Adjusted Pension: ${adjusted:,.2f}")

    # ✅ Commented out to suppress trailing output
    # return adjusted

# 🚀 Run it
calculate_final_pension(parsed_vars)

Step 15 -- Q&A Query

In [None]:
# ✅ Q&A query (non-pension calculation)
qa_query = "What is the military service rule?"  # 🔁 Replace with any question

# Retrieve relevant chunks
qa_nodes = query_engine.retrieve(qa_query)

# Combine top chunks into context
qa_context = "\n\n".join([n.text.strip() for n in qa_nodes[:3]])  # Use top 3 for brevity

# Construct Q&A prompt
qa_prompt = f"""You are a pension plan assistant. Use the information below to answer the user's question clearly.

Context:
\"\"\"
{qa_context}
\"\"\"

Question: {qa_query}
Answer:"""

# Run the model
qa_response = llm.complete(qa_prompt)

# Print output
print("\n💬 Q&A Response:\n")
print(qa_response)

Step 16 -- Once click **rerun**

In [None]:
# 🔁 Update with your test query here
user_query = "I'm Josh, I'm 60, I've worked here for 28 years, and I make $110,000."

# Step 8: Extract structured inputs
inputs = extract_inputs(user_query)

# Step 9: Construct question based on extracted inputs
ret_age = inputs["retirement_age"]
yos = inputs["years_of_service"]

if ret_age and ret_age < 63:
    question = (
        f"What is the pension formula for someone with {yos} years of service? "
        f"If they retire at age {ret_age}, which is before the normal retirement age of 63, "
        f"is there a penalty? If so, what is the exact penalty listed in the table for age {ret_age}? "
        f"Do not estimate it — use the table if available."
    )
else:
    question = (
        f"What is the pension formula for someone with {yos} years of service? "
        f"Is there a penalty if they retire at age {ret_age}?"
    )

print(f"\n🧠 Dynamic LLM Query:\n{question}")

# Step 10: Retrieve and rerank chunks
reranked_nodes = get_reranked_nodes(question, pension_index, rerank_with_metadata_priority)
max_chunks = 5
context_blocks = [node.text.strip() for node in reranked_nodes[:max_chunks]]
contains_penalty_table = any(
    "penalty table" in (node.metadata.get("table_keywords") or [])
    for node in reranked_nodes[:max_chunks]
)
penalty_flag_note = (
    "\n\n⚠️ Penalty table detected in context. Use it to apply early retirement reductions if age < 63.\n"
    if contains_penalty_table else ""
)
combined_context = "\n\n".join(context_blocks) + penalty_flag_note

# Step 11: Build prompt
system_prompt = """You are an expert pension plan assistant.

Use only the provided context to answer questions. Do not guess or make assumptions.

If the context includes a penalty table, apply it based on the retirement age.
Only say "no penalty" if the table or context explicitly says so.
"""

prompt_v10 = f"""{system_prompt}

Based on the documentation below, answer the question in clear, concise terms.

Context:
\"\"\"
{combined_context}
\"\"\"

Question: {question}
Answer:"""

print("\n📝 Final Prompt Sent to LLM:\n", prompt_v10[:1000])

# Step 12: Run model
response = llm.complete(prompt_v10)
print("\n💬 LLM Response:\n", response)

# Step 13: Parse model output into variables
parsed_vars = parse_llm_response(response.text, inputs)

# Step 14: Calculate and show final pension result
calculate_final_pension(parsed_vars)