In [None]:
# src/rag_pipeline.py

import faiss
import pickle
import numpy as np
from sentence_transformers import SentenceTransformer
from transformers import pipeline

# --- 1. Load FAISS index + embeddings ---
def load_vector_store(store_path="vector_store/faiss_index/"):
    index = faiss.read_index(f"{store_path}faiss.index")
    with open(f"{store_path}chunks.pkl", "rb") as f:
        chunks = pickle.load(f)
    with open(f"{store_path}metadata.pkl", "rb") as f:
        metadata = pickle.load(f)
    return index, chunks, metadata

# --- 2. Load embedding model + generator ---
embedder = SentenceTransformer("all-MiniLM-L6-v2")
generator = pipeline("text-generation", model="tiiuae/falcon-7b-instruct", device=0)  # OR mistral-7B, llama2 etc.

# --- 3. Retrieval Function ---
def retrieve_relevant_chunks(query, index, chunks, metadata, k=5):
    query_vec = embedder.encode([query])
    D, I = index.search(np.array(query_vec), k)
    top_chunks = [chunks[i] for i in I[0]]
    top_meta = [metadata[i] for i in I[0]]
    return top_chunks, top_meta

# --- 4. Prompt Template ---
def build_prompt(context, question):
    joined_context = "\n\n".join(context)
    prompt = f"""
You are a financial analyst assistant for CrediTrust. Your job is to answer questions based on real customer complaints.

Use only the information in the context below. If the answer is not in the context, say "I don't have enough information."

Context:
{joined_context}

Question:
{question}

Answer:"""
    return prompt.strip()

# --- 5. Complete RAG Pipeline ---
def answer_query(query, index, chunks, metadata, k=5):
    top_chunks, top_meta = retrieve_relevant_chunks(query, index, chunks, metadata, k)
    prompt = build_prompt(top_chunks, query)
    result = generator(prompt, max_new_tokens=300, do_sample=True)[0]["generated_text"]
    return result, top_chunks, top_meta


In [None]:
from src.rag_pipeline import load_vector_store, answer_query

index, chunks, metadata = load_vector_store()

questions = [
    "Why are users unhappy with Buy Now, Pay Later?",
    "What are the common complaints about credit cards?",
    "Do savings accounts have frequent fraud issues?",
    "Why do customers complain about money transfers?",
    "Are there refund delays reported in personal loans?",
]

results = []

for q in questions:
    answer, top_chunks, top_meta = answer_query(q, index, chunks, metadata)
    results.append({
        "question": q,
        "answer": answer.strip(),
        "sources": top_chunks[:2],
        "quality": "",  # Fill in manually (1–5)
        "comments": ""  # Write your judgment here
    })

import pandas as pd
df_eval = pd.DataFrame(results)
df_eval.to_markdown("report/task3_evaluation.md", index=False)
print("✅ Evaluation table saved.")
