# ApplePay RAG Chatbot


In [None]:
#


import re, time, json, pathlib, requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin

APPLEPAY_ROOTS = [
    "https://www.apple.com/apple-pay/",
    "https://support.apple.com/apple-pay",
    "https://flutterwave.com/ke/support/payment-methods/apple-pay-frequently-asked-questions-faqs",
    "https://aibgb.co.uk/apple-pay/apple-pay-faqs",
    "https://www.americanexpress.com/us/credit-cards/features-benefits/digital-wallets/apple-pay/frequently-asked-questions.html",
    "https://www.wellsfargo.com/help/mobile-features/apple-pay-faqs/"
    #"https://horizonbank.com.au/help/faqs/apple-pay-faqs/" blocked
]

def get_links(base_url):
    resp = requests.get(base_url, timeout=20)
    resp.raise_for_status()
    soup = BeautifulSoup(resp.text, "html.parser")
    links = set()
    for a in soup.find_all("a", href=True):
        href = a["href"]
        if href.startswith("#"): continue
        full = urljoin(base_url, href)
        if "apple.com" in full and ("apple-pay" in full or "support.apple.com" in full):
            links.add(full)
    return sorted(links)

def fetch_page(url):
    r = requests.get(url, timeout=20)
    r.raise_for_status()
    return r.text

def clean_text(html):
    soup = BeautifulSoup(html, "html.parser")
    for tag in soup(["script","style","noscript"]):
        tag.decompose()
    text = soup.get_text("\n")
    text = re.sub(r"\n{2,}", "\n", text).strip()
    title = soup.title.string.strip() if soup.title else ""
    return {"title": title, "text": text}

output_dir="data/raw"
pathlib.Path(output_dir).mkdir(parents=True, exist_ok=True)
visited = set()
pages = []
for root in APPLEPAY_ROOTS:
    links = get_links(root)
    for url in links:
        if url in visited: continue
        visited.add(url)
        try:
            html = fetch_page(url)
            cleaned = clean_text(html)
            pages.append({"url": url, **cleaned})
            time.sleep(0.5)
        except Exception as e:
            print("Failed:", url, e)
outpath = pathlib.Path(output_dir) / "bs4_pages.json"
with open(outpath, "w", encoding="utf-8") as f:
    json.dump(pages, f, ensure_ascii=False, indent=2)
print(f"BS4 scraped {len(pages)} pages → {outpath}")

#pages/sec 74/1m
#failures 1/7

In [None]:

import trafilatura

URLS = [
    "https://www.apple.com/apple-pay/",
    "https://support.apple.com/apple-pay",
    "https://flutterwave.com/ke/support/payment-methods/apple-pay-frequently-asked-questions-faqs",
    "https://aibgb.co.uk/apple-pay/apple-pay-faqs",
    "https://www.americanexpress.com/us/credit-cards/features-benefits/digital-wallets/apple-pay/frequently-asked-questions.html",
    "https://www.wellsfargo.com/help/mobile-features/apple-pay-faqs/"
    "https://horizonbank.com.au/help/faqs/apple-pay-faqs/"
]

output_dir="data/raw"
pathlib.Path(output_dir).mkdir(parents=True, exist_ok=True)
pages = []
for url in URLS:
    downloaded = trafilatura.fetch_url(url)
    if not downloaded:
        print("Fail:", url); continue
    text = trafilatura.extract(downloaded, include_comments=False, include_tables=False)
    pages.append({"url": url, "title": "", "text": text or ""})
outpath = pathlib.Path(output_dir) / "trafilatura_pages.json"
with open(outpath, "w", encoding="utf-8") as f:
    json.dump(pages, f, ensure_ascii=False, indent=2)
print(f"Trafilatura scraped {len(pages)} pages → {outpath}")

#pages/sec: 5/4s
#failure:1/7


Fail: https://www.wellsfargo.com/help/mobile-features/apple-pay-faqs/https://horizonbank.com.au/help/faqs/apple-pay-faqs/
Trafilatura scraped 5 pages → data/raw/trafilatura_pages.json


In [None]:

def load_pages(path):
    with open(path, "r", encoding="utf-8") as f:
        return json.load(f)

def count_tokens(text):
    # Simple token count by whitespace split
    return len(text.split())

def noise_ratio(text):
    # Heuristic: ratio of non-alphanumeric characters to total characters
    if not text:
        return 1.0
    non_alpha = len(re.findall(r"[^a-zA-Z0-9\s]", text))
    return round(non_alpha / len(text), 4)

def report(method_name, pages):
    num_pages = len(pages)
    total_tokens = 0
    noise_scores = []
    for p in pages:
        txt = p.get("text", "")
        total_tokens += count_tokens(txt)
        noise_scores.append(noise_ratio(txt))
    avg_noise = round(sum(noise_scores) / len(noise_scores), 4) if noise_scores else 0
    return {
        "method": method_name,
        "#pages": num_pages,
        "#tokens": total_tokens,
        "avg_noise_ratio": avg_noise
    }

base_dir = pathlib.Path("data/raw")
bs4_file = base_dir / "bs4_pages.json"
tra_file = base_dir / "trafilatura_pages.json"

bs4_pages = load_pages(bs4_file)
tra_pages = load_pages(tra_file)

bs4_report = report("BeautifulSoup", bs4_pages)
tra_report = report("Trafilatura", tra_pages)

print("\n=== Scrape Quality Report ===")
for r in [bs4_report, tra_report]:
    print(f"\nMethod: {r['method']}")
    print(f"Pages: {r['#pages']}")
    print(f"Tokens: {r['#tokens']}")
    print(f"Avg Noise Ratio: {r['avg_noise_ratio']}")

# Optional: Save to JSON
out_path = base_dir / "scrape_quality_report.json"
with open(out_path, "w", encoding="utf-8") as f:
    json.dump({"BeautifulSoup": bs4_report, "Trafilatura": tra_report}, f, indent=2)
print(f"\nReport saved to {out_path}")


# === Scrape Quality Report ===

# Method: BeautifulSoup
# Pages: 74
# Tokens: 295217
# Avg Noise Ratio: 0.0695

# Method: Trafilatura
# Pages: 5
# Tokens: 4493
# Avg Noise Ratio: 0.0281



=== Scrape Quality Report ===

Method: BeautifulSoup
Pages: 74
Tokens: 295217
Avg Noise Ratio: 0.0695

Method: Trafilatura
Pages: 5
Tokens: 4493
Avg Noise Ratio: 0.0281

Report saved to data/raw/scrape_quality_report.json


In [None]:
import pathlib

in_files=["data/raw/bs4_pages.json"]
out_file="data/processed/pages.json"
pathlib.Path(out_file).parent.mkdir(parents=True, exist_ok=True)
rows = []
for f in in_files:
    with open(f, "r", encoding="utf-8") as fh:
        data = json.load(fh)
        for d in data:
            text = d.get("text")
            if not text and d.get("html"):
                # crude HTML→text fallback
                from bs4 import BeautifulSoup
                soup = BeautifulSoup(d["html"], "html.parser")
                text = soup.get_text("\n")
            if not text: continue
            rows.append({
                "url": d["url"],
                "title": d.get("title",""),
                "text": text.strip()
            })
with open(out_file, "w", encoding="utf-8") as out:
    json.dump(rows, out, ensure_ascii=False, indent=2)
print(f"Normalized {len(rows)} pages → {out_file}")





Normalized 77 pages → data/processed/pages.json


In [None]:
#chunkers
#fixed size chunker----------------------------------------------
def fixed_chunk_text(text:str, chunk_size:int=512, overlap:int=0):
    words = text.split()
    chunks=[]; i=0
    while i < len(words):
        chunks.append(" ".join(words[i:i+chunk_size]))
        i += max(1, chunk_size-overlap)
    return chunks

#semantic chunker------------------------------------------------------------------
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import nltk
nltk.download('punkt', quiet=True)
nltk.download('punkt_tab')

semantic_model = SentenceTransformer("all-MiniLM-L6-v2")

def semantic_chunk_text(text, similarity_threshold=0.8, max_tokens=500):
    """
    Splits text into semantic chunks based on sentence similarity and max token length.
    """
    sentences = nltk.sent_tokenize(text)
    if not sentences:
        return []
    embeddings = semantic_model.encode(sentences)
    chunks = []
    current_chunk = [sentences[0]]
    current_embedding = embeddings[0]
    for i in range(1, len(sentences)):
        sim = cosine_similarity([current_embedding], [embeddings[i]])[0][0]
        chunk_token_count = len(" ".join(current_chunk)) // 4
        if sim >= similarity_threshold and chunk_token_count < max_tokens:
            current_chunk.append(sentences[i])
            current_embedding = (current_embedding + embeddings[i]) / 2
        else:
            chunks.append(" ".join(current_chunk))
            current_chunk = [sentences[i]]
            current_embedding = embeddings[i]
    if current_chunk:
        chunks.append(" ".join(current_chunk))
    return chunks

#recursive chunker------------------------------------------------------------------

def recursive_chunk_text(text, max_chunk_size=1000):
    """
    Recursively splits a block of text into chunks that fit within size constraints.
    Tries splitting by sections, then newlines, then sentences.
    """
    import nltk
    nltk.download("punkt", quiet=True)

    def split_chunk(chunk):
        if len(chunk) <= max_chunk_size:
            return [chunk]
        # Try splitting by double newlines
        sections = chunk.split("\n\n")
        if len(sections) > 1:
            result = []
            for section in sections:
                if section.strip():
                    result.extend(split_chunk(section.strip()))
            return result
        # Try splitting by single newline
        sections = chunk.split("\n")
        if len(sections) > 1:
            result = []
            for section in sections:
                if section.strip():
                    result.extend(split_chunk(section.strip()))
            return result
        # Fallback: split by sentences
        sentences = nltk.sent_tokenize(chunk)
        chunks, current_chunk, current_size = [], [], 0
        for sentence in sentences:
            if current_size + len(sentence) > max_chunk_size:
                if current_chunk:
                    chunks.append(" ".join(current_chunk))
                current_chunk = [sentence]
                current_size = len(sentence)
            else:
                current_chunk.append(sentence)
                current_size += len(sentence)
        if current_chunk:
            chunks.append(" ".join(current_chunk))
        return chunks

    return split_chunk(text)

#structure based chunker------------------------------------------------------------------

def structure_chunk_text(text):
    """
    Splits text into chunks based on detected headings (e.g., CHAPTER, section numbers).
    """
    lines = text.split("\n")
    chunks = []
    current_chunk = []

    heading_tags = ('\h1','\h2','\h3','\h4','\h5','\h6')
    for line in lines:
        if any(heading in (line.strip()) for heading in heading_tags) and current_chunk:
            chunks.append("\n".join(current_chunk))
            current_chunk = [line]
        else:
            current_chunk.append(line)
    if current_chunk:
        chunks.append("\n".join(current_chunk))
    return chunks

#LLM based chunker------------------------------------------------------------------
from openai import OpenAI
import os
def llm_chunk_text(text, chunk_size=1000, model="gpt-4o-mini", api_key=None):
    """
    Uses an LLM to find semantically coherent chunk boundaries around a target chunk size.
    """
    client = OpenAI(api_key=api_key)
    def get_chunk_boundary(text_segment):
        prompt = f"""
        Analyze the following text and identify the best point to split it
        into two semantically coherent parts. The split should occur near {chunk_size} characters.
        Text:
        \"\"\"{text_segment}\"\"\"
        Return only the integer index (character position) within this text where the split should occur.
        """
        response = client.chat.completions.create(
            model=model,
            messages=[
                {"role": "system", "content": "You are a text analysis expert."},
                {"role": "user", "content": prompt}
            ],
            temperature=0
        )
        split_str = response.choices[0].message.content.strip()
        try:
            split_point = int(split_str)
        except ValueError:
            split_point = chunk_size
        return split_point

    chunks = []
    remaining_text = text
    while len(remaining_text) > chunk_size:
        text_window = remaining_text[:chunk_size * 2]
        split_point = get_chunk_boundary(text_window)
        if split_point < 100 or split_point > len(text_window) - 100:
            split_point = chunk_size
        chunks.append(remaining_text[:split_point].strip())
        remaining_text = remaining_text[split_point:].strip()
    if remaining_text:
        chunks.append(remaining_text)
    return chunks



  heading_tags = ('\h1','\h2','\h3','\h4','\h5','\h6')
  heading_tags = ('\h1','\h2','\h3','\h4','\h5','\h6')
  heading_tags = ('\h1','\h2','\h3','\h4','\h5','\h6')
  heading_tags = ('\h1','\h2','\h3','\h4','\h5','\h6')
  heading_tags = ('\h1','\h2','\h3','\h4','\h5','\h6')
  heading_tags = ('\h1','\h2','\h3','\h4','\h5','\h6')
  from .autonotebook import tqdm as notebook_tqdm


[nltk_data] Downloading package punkt_tab to
[nltk_data]     /home/andreas_koutsopoulos/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


In [3]:


from pathlib import Path
import json

# Paths
INPUT_FILE = "data/processed/pages.json"
OUTPUT_DIR = Path("data/chunks")
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

def load_pages(path):
    with open(path, "r", encoding="utf-8") as f:
        return json.load(f)

def chunk_page(page, strategy="fixed", **kwargs):
    text = page.get("text", "")
    if not text.strip():
        return []
    if strategy == "fixed":
        chunks = fixed_chunk_text(text, chunk_size=kwargs.get("chunk_size", 512),overlap=kwargs.get("overlap", 64))
    elif strategy == "semantic":
        chunks = semantic_chunk_text(text,
                                     similarity_threshold=kwargs.get("similarity_threshold", 0.8),
                                     max_tokens=kwargs.get("max_tokens", 500))
    elif strategy == "recursive":
        chunks = recursive_chunk_text(text,
                                      max_chunk_size=kwargs.get("max_chunk_size", 1000))
    elif strategy == "structure":
        chunks = structure_chunk_text(text)
    elif strategy == "llm":
        chunks = llm_chunk_text(text,
                                chunk_size=kwargs.get("chunk_size", 1000),
                                model=kwargs.get("model", "gpt-4o-mini"),
                                api_key=kwargs.get("api_key"))
    else:
        raise ValueError(f"Unknown strategy: {strategy}")
    # Wrap chunks with metadata
    return [
        {
            "url": page.get("url"),
            "title": page.get("title"),
            "chunk_index": i,
            "chunk": c,
            "chunk_char_count": len(c),
            "chunk_word_count": len(c.split()),
            "chunk_token_count": round(len(c) / 4, 2),  # approx tokens
            "strategy": strategy
        }
        for i, c in enumerate(chunks)
    ]

def process_pages(strategy="fixed", **kwargs):
    pages = load_pages(INPUT_FILE)
    all_chunks = []
    for page in pages:
        all_chunks.extend(chunk_page(page, strategy=strategy, **kwargs))
    # Save chunks
    out_file = OUTPUT_DIR / f"{strategy}_chunks.json"
    with open(out_file, "w", encoding="utf-8") as f:
        json.dump(all_chunks, f, ensure_ascii=False, indent=2)
    print(f"✅ {strategy.capitalize()} chunking complete: {len(all_chunks)} chunks saved to {out_file}")

# Example runs:
process_pages(strategy="fixed", chunk_size=512, overlap=128)
process_pages(strategy="semantic", similarity_threshold=0.75, max_tokens=500)
#process_pages(strategy="recursive", max_chunk_size=800)
#process_pages(strategy="structure")
#process_pages(strategy="llm", chunk_size=800, model="gpt-4o-mini", api_key="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")

#chunk sizes
#Fixed chunking complete: 9725 chunks saved to data\chunks\fixed_chunks.json  2s
#Llm chunking complete: 5996 chunks saved to data\chunks\llm_chunks.json 43m
#semantic 6539  21m
#recursive: 55213 
#structure: 365 
#fixed size: 256,0: 1209, 256,64: 1598, 256,128: 2380,
#  512,0: 631, 512,64: 712, 512,128: 816,

✅ Fixed chunking complete: 802 chunks saved to data/chunks/fixed_chunks.json


✅ Semantic chunking complete: 6318 chunks saved to data/chunks/semantic_chunks.json


In [None]:
#emdedings
from backend.embeddings.e5_embed import embed_texts as e5_embed
from backend.embeddings.bge_embed import embed_texts as bge_embed
from backend.vectorstore.chroma_store import get_chroma, upsert_docs
from backend.embeddings.minilm_embed import embed_texts as minilm_embed
from backend.vectorstore.faiss_store import FaissStore
import faiss

import json
import os
import pandas as pd
import numpy as np

# load chunks
chunk_file="data/chunks/llm_chunks.json"
with open(chunk_file,"r",encoding="utf-8") as f:
    chunks = json.load(f)

#embed

embed_fn = e5_embed
vectors_e5 = embed_fn([c["chunk"] for c in chunks])
# Save embeddings to file
embeddings_df = pd.DataFrame(vectors_e5)
embeddings_df_save_path = "vectors_e5.csv"
embeddings_df.to_csv(embeddings_df_save_path, index=False)
vectors=vectors_e5
indexfile="index_e5.faiss"
docsfile="docs_e5.json"

# embed_fn = bge_embed
# vectors_bge = embed_fn([c["chunk"] for c in chunks])
# # Save embeddings to file
# embeddings_df = pd.DataFrame(vectors_bge)
# embeddings_df_save_path = "vectors_bge.csv"
# embeddings_df.to_csv(embeddings_df_save_path, index=False)
# vectors=vectors_bge
# indexfile="index_bge.faiss"
# docsfile="docs_e5.json"

# embed_fn = minilm_embed
# vectors_minilm = embed_fn([c["chunk"] for c in chunks])
# # Save embeddings to file
# embeddings_df = pd.DataFrame(vectors_minilm)
# embeddings_df_save_path = "vectors_minilm.csv"
# embeddings_df.to_csv(embeddings_df_save_path, index=False)
# vectors=vectors_minilm
# indexfile="index_minilm.faiss"
# docsfile="docs_minilm.json"

#store

index_dir = "data/index/faiss"
os.makedirs(index_dir, exist_ok=True)

dim = len(vectors[0])
index = faiss.IndexFlatIP(dim)  # cosine similarity with normalized vectors
emb = np.array(vectors).astype("float32")
faiss.normalize_L2(emb)
index.add(emb)

# Save index
faiss.write_index(index, os.path.join(index_dir, indexfile))

# Save docs metadata
docs_path = os.path.join(index_dir, docsfile)
with open(docs_path, "w", encoding="utf-8") as f:
    json.dump(chunks, f, ensure_ascii=False, indent=2)

print(f"✅ FAISS index built with {len(chunks)} chunks → {index_dir}")


✅ FAISS index built with 77 chunks → data/index/faiss


In [None]:

from typing import Dict, List, Tuple
from openai import OpenAI


def retrieve(query: str, top_k: int = 6):
    #embed_fn = e5_embed
    qv = embed_fn([query])[0]
    results = []

    # FAISS retrieval
    index_dir = "data/index/faiss"
    index_path = os.path.join(index_dir, indexfile)
    docs_path = os.path.join(index_dir, docsfile)

    if not os.path.exists(index_path) or not os.path.exists(docs_path):
        raise FileNotFoundError("FAISS index or docs.json not found. Run build_index first.")

    # Load FAISS index
    import faiss
    import numpy as np
    with open(docs_path, "r", encoding="utf-8") as f:
        docs = json.load(f)

    index = faiss.read_index(index_path)
    q = np.array([qv]).astype("float32")
    faiss.normalize_L2(q)
    D, I = index.search(q, top_k)

    for rank, idx in enumerate(I[0]):
        doc = docs[idx]
        results.append({
            "text": doc.get("chunk", doc.get("chunk_text", "")),
            "meta": {
                "url": doc.get("url"),
                "title": doc.get("title"),
                "strategy": doc.get("strategy", "unknown"),
                "score": float(D[0][rank])
            }
        })
    return results


def answer(query: str, contexts: List[Dict]) -> Dict:
    client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
    sys = "You are a customer support assistant for Apple Pay. Answer strictly using the provided context and cite sources."
    citations_md = "\n\n".join([f"[{i+1}] {c['meta']['title'] or c['meta']['url']} — {c['meta']['url']}" for i, c in enumerate(contexts)])
    context_text = "\n\n---\n\n".join([c["text"] for c in contexts])

    prompt = f"""Question: {query}

Context (use only this information):
{context_text}

Citations:
{citations_md}

Instructions:
- If an answer is not present in the context, say you don't know and suggest contacting Apple support.
- Include bracketed citation numbers like [1], [2] inline where relevant.
- Keep answers concise and accurate."""

    t0 = time.time()
    resp = client.chat.completions.create(
        model=os.getenv("OPENAI_CHAT_MODEL","gpt-4o-mini"),
        messages=[{"role":"system","content":sys},
                  {"role":"user","content":prompt}],
        temperature=0.0
    )
    dt = int((time.time() - t0) * 1000)
    content = resp.choices[0].message.content
    usage = resp.usage
    return {
        "answer": content,
        "citations": [{"index": i+1, "url": c["meta"]["url"], "title": c["meta"]["title"], "snippet": c["text"][:240]} for i, c in enumerate(contexts)],
        "latency_ms": dt,
        "usage": {"prompt_tokens": usage.prompt_tokens, "completion_tokens": usage.completion_tokens, "total_tokens": usage.total_tokens}
    }

#e5_embed: 18.9MB, Latency P50: 2315.5 ms; P95: 6044 ms
#bge: 9.4MB
#minilim: 9.4MB


In [None]:

import json, time, statistics

def run_eval(top_k=6):
    with open("backend/evaluation/queries.json","r") as f:
        queries = json.load(f)
    latencies = []
    for q in queries:
        t0 = time.time()
        ctx = retrieve(q, top_k=top_k)
        res = answer(q, ctx)
        latencies.append(res["latency_ms"])
        # TODO: compute P@1, Recall@k, MRR against a labeled mapping of query→relevant URLs
        print(f"Q: {q}\nA: {res['answer'][:220]}...\nLatency: {res['latency_ms']} ms\n---")
    print("Latency P50:", statistics.median(latencies), "ms; P95:", sorted(latencies)[int(0.95*len(latencies))], "ms")

if __name__ == "__main__":
    run_eval()


Q: How do I add a new card to Apple Pay?
A: To add a new card to Apple Pay, follow these steps based on your device:

- **Mac**: On models with Touch ID, go to System Preferences, then Wallet & Apple Pay, and tap Add Card.
  
- **iPhone**: Open the Wallet app and ...
Latency: 6044 ms
---
Q: What fees apply when using Apple Pay?
A: Apple does not charge any fees when you pay with Apple Pay, whether in stores, online, or in apps [1][4]....
Latency: 1074 ms
---
Q: How can I request a refund for an Apple Pay purchase?
A: To request a refund for an Apple Pay purchase, you can handle it in the same way as you would for other card purchases made using Apple Pay. This includes making changes to your order, returns, or refunds as per the merc...
Latency: 2193 ms
---
Q: Is Apple Pay supported on Apple Watch without iPhone?
A: The provided context does not specify whether Apple Pay can be used on the Apple Watch without an iPhone. It mentions that you can complete purchases using Apple Pay on a c

In [None]:

import json
import time
import statistics
import re
import csv
from typing import List, Dict


from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

model = SentenceTransformer("all-MiniLM-L6-v2")
threshold = 0.65

# ---------------- Tokenization ----------------
def tokenize(text: str) -> List[str]:
    return re.findall(r"\w+", text.lower())

def answer_f1(predicted: str, reference: str) -> float:
    """
    Compute F1 using cosine similarity for precision and recall.
    Precision = sim(pred, ref) / sim(pred, pred)
    Recall    = sim(pred, ref) / sim(ref, ref)
    """
    if not predicted.strip() or not reference.strip():
        return 0.0

    # Embed both answers
    embeddings = model.encode([predicted, reference])
    pred_emb, ref_emb = embeddings[0], embeddings[1]

    # Cosine similarities
    sim_pred_ref = cosine_similarity([pred_emb], [ref_emb])[0][0]
    sim_pred_pred = cosine_similarity([pred_emb], [pred_emb])[0][0]  # should be 1.0
    sim_ref_ref = cosine_similarity([ref_emb], [ref_emb])[0][0]      # should be 1.0

    # Compute precision and recall
    precision = sim_pred_ref / sim_pred_pred if sim_pred_pred > 0 else 0
    recall = sim_pred_ref / sim_ref_ref if sim_ref_ref > 0 else 0

    # F1 formula
    if precision + recall == 0:
        return 0.0
    return 2 * precision * recall / (precision + recall)


def precision_at_1(results: List[str], relevant: str) -> float:
    if not results:
        return 0.0
    ref_emb = model.encode([relevant])
    top_emb = model.encode([results[0]])
    
    sims=float(cosine_similarity(ref_emb, top_emb)[0][0])  # similarity score
    if sims > threshold:  # threshold for "relevant enough"
        return 1.0
    return 0.0


def recall_at_k(results: List[str], relevant: str) -> float:
    if not results:
        return 0.0
    ref_emb = model.encode([relevant])
    retrieved_embs = model.encode(results)
    sims = cosine_similarity(ref_emb, retrieved_embs)[0]
    sims=float(max(sims))  # best similarity among top-k
    if sims > threshold:  # threshold for "relevant enough"
        return 1.0
    return 0.0    

def mrr(results: List[str], relevant: str) -> float:
    if not results:
        return 0.0
    ref_emb = model.encode([relevant])
    retrieved_embs = model.encode(results)
    sims = cosine_similarity(ref_emb, retrieved_embs)[0]
    # Rank by similarity
    #sorted_indices = np.argsort(-sims)
    for idx in range(1,len(sims)+1):
        if sims[idx-1] > threshold:  # threshold for "relevant enough"
            return 1.0 / idx
    return 0.0


# ---------------- Ground Truth ----------------
GROUND_TRUTH = {
    "How do I add a new card to Apple Pay?": "Open the Wallet app on your iPhone, tap the Add (+) button, and follow the prompts to add a credit or debit card. You can also add cards through the Apple Watch app for your watch.",
    "What fees apply when using Apple Pay?": "Apple does not charge fees for using Apple Pay. However, your bank or card issuer may apply standard transaction or international fees.",
    "How can I request a refund for an Apple Pay purchase?": "Refunds are handled by the merchant. Contact the store or app where you made the purchase; the refund will return to your original payment card in Wallet.",
    "Is Apple Pay supported on Apple Watch without iPhone?": "Yes, once Apple Pay is set up on your Apple Watch, you can make payments without your iPhone nearby. Initial setup requires pairing with an iPhone.",
    "What are Apple Pay security features like tokenization?": "Apple Pay uses tokenization, device-specific numbers, and dynamic security codes. Your actual card number is never shared with merchants or stored on Apple servers.",
    "How do merchants integrate Apple Pay on the web?": "Merchants can enable Apple Pay by using Apple Pay JS API and configuring their payment processor to support Apple Pay. They must register with Apple and verify their domain.",
    "Where can I find KYC or identity verification requirements?": "KYC requirements apply to Apple Cash and some bank cards. Verification is done through your bank or Apple Cash setup in Wallet, where you may need to provide personal details.",
    "How do I remove a card from Apple Pay?": "Open the Wallet app, select the card, tap More (…), and choose Remove Card. On Apple Watch, use the Watch app on iPhone or remove directly from the watch.",
    "Does Apple Pay work internationally and what are limits?": "Apple Pay works in most countries where contactless payments are accepted. Limits depend on local regulations and your card issuer, not Apple Pay itself.",
    "How do I contact Apple support for Apple Pay disputes?": "Visit support.apple.com/apple-pay or use the Apple Support app. For transaction disputes, contact your bank or card issuer first."
}

# ---------------- Evaluation Loop ----------------
# from rag_pipeline import retrieve, answer  # Uncomment in your environment

def run_eval(top_k=6, output_csv="evaluation_report.csv"):
    with open("backend/evaluation/queries.json", "r") as f:
        queries = json.load(f)

    latencies = []
    rows = []
    f1_scores, p1_scores, recall_scores, mrr_scores = [], [], [], []

    for q in queries:
        t0 = time.time()
        ctx = retrieve(q, top_k=top_k)  # Retrieves top_k chunks
        res = answer(q, ctx)            # Generates answer using RAG
        latency = res["latency_ms"]
        latencies.append(latency)

        predicted = res["answer"]
        reference = GROUND_TRUTH.get(q, "")
        retrieved_texts = [c["text"] for c in ctx]

        # Compute metrics
        f1 = answer_f1(predicted, reference)
        p1 = precision_at_1(retrieved_texts, reference)
        rec = recall_at_k(retrieved_texts, reference)
        rr = mrr(retrieved_texts, reference)

        f1_scores.append(f1)
        p1_scores.append(p1)
        recall_scores.append(rec)
        mrr_scores.append(rr)

        rows.append([q, f1, p1, rec, rr, latency])

        print(f"Q: {q}\nA: {predicted[:220]}...\nF1={f1:.3f}, P@1={p1:.3f}, Recall@k={rec:.3f}, MRR={rr:.3f}, Latency={latency}ms\n---")

    # Write CSV
    with open(output_csv, "w", newline="", encoding="utf-8") as f:
        writer = csv.writer(f)
        writer.writerow(["Query", "Answer F1", "Precision@1", "Recall@k", "MRR", "Latency(ms)"])
        writer.writerows(rows)
        writer.writerow([])
        writer.writerow(["Average",
                         statistics.mean(f1_scores),
                         statistics.mean(p1_scores),
                         statistics.mean(recall_scores),
                         statistics.mean(mrr_scores),
                         statistics.median(latencies)])
    print(f"\n✅ Evaluation complete → {output_csv}")
    print(f"Average F1={statistics.mean(f1_scores):.4f}, P@1={statistics.mean(p1_scores):.4f}, Recall@k={statistics.mean(recall_scores):.4f}, MRR={statistics.mean(mrr_scores):.4f}")
    print(f"Latency P50={statistics.median(latencies)} ms; P95={sorted(latencies)[int(0.95*len(latencies))]} ms")

if __name__ == "__main__":
    run_eval()

# minilm_embed: Average F1=0.7774, P@1=0.3000, Recall@k=0.6000, MRR=0.4033,Latency P50=1567.0 ms; P95=2454 ms, cost: 0.1$
#e5 embed: Average F1=0.8150, P@1=0.6000, Recall@k=0.8000, MRR=0.6833, Latency P50=1899.5 ms; P95=3402 ms, cost 0.15$
#bge embed: Average F1=0.7674, P@1=0.5000, Recall@k=0.6000, MRR=0.5333, Latency P50=1144.0 ms; P95=1928 ms, cost 0.15$

#e5
#semantic: Average F1=0.8150, P@1=0.6000, Recall@k=0.8000, MRR=0.6833, Latency P50=1899.5 ms; P95=3402 ms,(2.5 sec) cost 0.1$
#fixed 256,0: Average F1=0.8274, P@1=0.5000, Recall@k=0.7000, MRR=0.5833, Latency P50=1995.0 ms; P95=4886 ms, 2.9sec
#fixee 256,64: Average F1=0.8049, P@1=0.5000, Recall@k=0.6000, MRR=0.5333,Latency P50=3080.5 ms; P95=5507 ms 3.6 sec
#fixed 256,128: Average F1=0.8231, P@1=0.5000, Recall@k=0.7000, MRR=0.5833, Latency P50=3752.5 ms; P95=6097 ms 4 sec
#fixed 512,0: Average F1=0.8338, P@1=0.5000, Recall@k=0.6000, MRR=0.5500, Latency P50=3005.5 ms; P95=5658 ms  3.5sec
#fixed 512,64: Average F1=0.8278, P@1=0.4000, Recall@k=0.5000, MRR=0.4500, Latency P50=2938.5 ms; P95=4475 ms 3.5 sec
#fixed 512,128: Average F1=0.7967, P@1=0.4000, Recall@k=0.6000, MRR=0.4833, Latency P50=2801.5 ms; P95=6072 ms 3.5sec
#llm chunks: Average F1=0.8351, P@1=0.6000, Recall@k=0.7000, MRR=0.6250,Latency P50=1900.0 ms; P95=5205 ms 2.6 sec
#recursive: Average F1=0.8039, P@1=0.6000, Recall@k=0.8000, MRR=0.6450, Latency P50=1390.5 ms; P95=2789 ms 2.2 sec
#structure: Average F1=0.8472, P@1=0.5000, Recall@k=0.5000, MRR=0.5000,Latency P50=2845.0 ms; P95=12224 ms 4.3 sec

Q: How do I add a new card to Apple Pay?
A: To add a new card to Apple Pay, follow these steps based on your device:

**On iPhone:**
1. Open the Wallet app and tap the Add button.
2. Tap "Debit or Credit Card" to add a new card.
3. Tap Continue.
4. Tap or hold you...
F1=0.856, P@1=1.000, Recall@k=1.000, MRR=1.000, Latency=5546ms
---
Q: What fees apply when using Apple Pay?
A: The provided context does not specify any fees associated with using Apple Pay. It mentions that Apple Pay is a service provided by Apple Payments Services LLC and that any card used in Apple Pay is offered by the card i...
F1=0.898, P@1=0.000, Recall@k=0.000, MRR=0.000, Latency=1908ms
---
Q: How can I request a refund for an Apple Pay purchase?
A: To request a refund for an Apple Pay purchase, you need to contact the merchant directly. Apple Pay does not handle refunds; the process is managed by the merchant from whom you made the purchase. If you have questions a...
F1=0.765, P@1=0.000, Recall@k=0.000, MRR=0.000