In [4]:
# === Task 3: Retrieval-Augmented Generation (RAG) ===

import faiss
import json
from sentence_transformers import SentenceTransformer
from transformers import pipeline

# === Load FAISS index and metadata ===
index = faiss.read_index("vector_store/faiss_index.bin")
with open("vector_store/metadata.json", "r", encoding="utf-8") as f:
    metadata = json.load(f)

# === Load the same embedding model used in Task 2 ===
embedder = SentenceTransformer("paraphrase-MiniLM-L3-v2")

# === Load a small LLM for generation (can be swapped later) ===
generator = pipeline("text-generation", model="gpt2", max_length=512)

# === RAG Retriever Function ===
def retrieve_top_k_chunks(query: str, k: int = 5):
    query_embedding = embedder.encode([query], convert_to_numpy=True).astype("float32")
    distances, indices = index.search(query_embedding, k)
    return [metadata[i]["chunk"] for i in indices[0]]

# === Prompt Template Builder ===
def build_prompt(context_chunks, question):
    context = "\n\n".join(context_chunks)
    prompt = f"""
You are a financial analyst assistant for CrediTrust. Your task is to answer questions about customer complaints.

Use the following retrieved complaint excerpts to formulate your answer. If the context doesn't contain the answer, state that you don't have enough information.

Context:
{context}

Question: {question}

Answer:"""
    return prompt.strip()

# === RAG Generator Pipeline ===
def answer_question_rag(query, k=5):
    retrieved_chunks = retrieve_top_k_chunks(query, k)
    prompt = build_prompt(retrieved_chunks, query)
    response = generator(prompt, max_new_tokens=200, do_sample=True, temperature=0.7)[0]["generated_text"]

    # Extract answer after "Answer:" if present
    answer_start = response.find("Answer:")
    answer = response[answer_start + len("Answer:"):].strip() if answer_start != -1 else response.strip()

    return {
        "question": query,
        "answer": answer,
        "retrieved_chunks": retrieved_chunks
    }


To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Device set to use cpu


In [5]:
import textwrap

# === Evaluation Questions ===
evaluation_questions = [
    "Why are customers unhappy with the Buy Now, Pay Later (BNPL) service?",
    "What are the most common reasons for credit card account closures?",
    "How often do users complain about overdraft or late payment fees on savings accounts?",
    "Are there any fraud-related complaints in personal loan applications?",
    "Can I dispute a late payment fee?"
]

# === Run RAG pipeline and collect answers ===
results = []
for q in evaluation_questions:
    rag_result = answer_question_rag(q)
    results.append({
        "question": q,
        "answer": rag_result["answer"],
        "sources": " | ".join(chunk.replace("\n", " ")[:300] for chunk in rag_result["retrieved_chunks"][:2]),  # Shortened and cleaned
        "quality": "",
        "comments": ""
    })

# === Print nicely ===
for i, r in enumerate(results, 1):
    print(f"\n=== Question {i} ===")
    print(f"Q: {r['question']}")
    print("Answer:")
    print(textwrap.fill(r['answer'], width=100))
    print("\nRetrieved Sources:")
    print(textwrap.fill(r['sources'], width=100))
    print(f"\nQuality Score (1-5): {r['quality']}")
    print(f"Comments: {r['comments']}")
    print("=" * 120)


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.



=== Question 1 ===
Q: Why are customers unhappy with the Buy Now, Pay Later (BNPL) service?
Answer:
I understand that this service is not a good option for customers and it is not an option for the
merchants. My answer is that the offer to use the service is not a good option for the merchants and
the merchants have to pay for it. The only way they can get it is to buy it from the merchants and
then they can give it to the customers who are paying the bill.  I understand that this service is
not a good option for all customers but for the merchants are the only ones who can afford to pay
for it. When you go into a store and are asked to provide the service and you provide the same
service to all of your customers it is not a good service for them but it is bad for the customers.
I understand that we are in a market where there are a lot of shopping malls and you can't just go
out there and buy the cheapest goods and services and people will not even bother to use it  Why
would these m

In [6]:
def answer_question_rag(query, k=5):
    ...
    return {
        "question": query,
        "answer": answer,
        "retrieved_chunks": retrieved_chunks
    }

def gradio_interface(query):
    result = answer_question_rag(query)
    answer = result["answer"]
    sources = "\n\n---\n\n".join(result["retrieved_chunks"][:2])  # show top 2 sources
    return answer, sources



In [None]:
import gradio as gr

interface = gr.Interface(
    fn=gradio_interface,
    inputs=gr.Textbox(lines=2, placeholder="Enter your question here...", label="User Question"),
    outputs=[
        gr.Textbox(label="Answer"),
        gr.Textbox(label="Top Retrieved Chunks")
    ],
    title="CrediTrust Complaint Assistant (RAG)",
    description="Ask a question about financial complaints. Answer is generated using retrieved complaint chunks.",
)

interface.launch(share=True)  # `share=True` gives public URL


* Running on local URL:  http://127.0.0.1:7860
* Running on public URL: https://a85b3ca897e83ee921.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




Traceback (most recent call last):
  File "c:\Users\pc\Desktop\KAIM\intelligent-complaint-analysis-w6\.venv\Lib\site-packages\gradio\queueing.py", line 626, in process_events
    response = await route_utils.call_process_api(
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\pc\Desktop\KAIM\intelligent-complaint-analysis-w6\.venv\Lib\site-packages\gradio\route_utils.py", line 322, in call_process_api
    output = await app.get_blocks().process_api(
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\pc\Desktop\KAIM\intelligent-complaint-analysis-w6\.venv\Lib\site-packages\gradio\blocks.py", line 2220, in process_api
    result = await self.call_function(
             ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\pc\Desktop\KAIM\intelligent-complaint-analysis-w6\.venv\Lib\site-packages\gradio\blocks.py", line 1731, in call_function
    prediction = await anyio.to_thread.run_sync(  # type: ignore
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^