In [1]:
import chromadb
from sentence_transformers import SentenceTransformer

# 1. Load the pre-built vector store
# Ensure the path matches where your Task 2 'vector_store' folder is located
client = chromadb.PersistentClient(path="../vector_store")
collection = client.get_collection(name="bank_complaints")

# 2. Load the embedding model (same one used in Task 2)
model = SentenceTransformer('all-MiniLM-L6-v2')

def get_relevant_context(query, k=5):
    """
    Takes a user's question, embeds it, and retrieves the 
    top-k relevant complaint chunks from the vector store.
    """
    # Generate embedding for the user's question
    query_embedding = model.encode(query).tolist()
    
    # Perform similarity search
    results = collection.query(
        query_embeddings=[query_embedding],
        n_results=k
    )
    
    # Combine retrieved documents into a single context string
    # We also keep the metadata to show sources later
    retrieved_docs = results['documents'][0]
    metadata = results['metadatas'][0]
    
    context = "\n\n".join(retrieved_docs)
    return context, retrieved_docs, metadata

# --- TEST THE RETRIEVER ---
test_query = "What are common complaints about credit card billing disputes?"
context, docs, meta = get_relevant_context(test_query)

print(f"Retrieved {len(docs)} relevant chunks.")
print("-" * 30)
print(f"Sample of retrieved context:\n{docs[0][:200]}...")

Retrieved 5 relevant chunks.
------------------------------
Sample of retrieved context:
i am filing this complaint to address unresolved billing disputes on my bank of america credit card account ending in despite initiating multiple disputes and following the necessary procedures...


In [16]:
from huggingface_hub import InferenceClient

# 1. Initialize the client with your token
# Make sure you replaced 'hf_YOUR_TOKEN_HERE' with your actual token
client = InferenceClient(api_key="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")

def generate_rag_response(query, context):
    """
    Combines the prompt, context, and query to generate an 
    answer from a supported LLM.
    """
    # Robust prompt template following the challenge instructions
    prompt = f"""You are a financial analyst assistant for CrediTrust. Your task is to answer questions about customer complaints. 
Use the following retrieved complaint excerpts to formulate your answer. 
If the context doesn't contain the answer, state that you don't have enough information.

Context:
{context}

Question: 
{query}

Answer:"""
    
    print("--- GENERATING RESPONSE ---")
    
    try:
        # Switching to a newer, well-supported model (Llama-3-8B)
        completion = client.chat.completions.create(
            model="meta-llama/Meta-Llama-3-8B-Instruct", 
            messages=[{"role": "user", "content": prompt}],
            max_tokens=500,
            temperature=0.7
        )
        
        return completion.choices[0].message.content
    
    except Exception as e:
        return f"Error during generation: {str(e)}"

# --- TEST THE FULL FLOW ---
# test_query and context should be available from your previous cells
test_answer = generate_rag_response(test_query, context)

print("\n--- FINAL AI ANSWER ---")
print(test_answer)

--- GENERATING RESPONSE ---

--- FINAL AI ANSWER ---
Based on the provided context, it's not explicitly stated that the complaints are about credit card billing disputes. However, since the context mentions "as a loyal Bank of America credit card holder" and mentions "credit bureaus reported on," it's possible to infer that the context is related to credit card billing disputes.

Since the complaints are not explicitly stated in the provided context, I will look for clues within the text. The text mentions "according to Chase most recently and many other credit companies" and "as part of the promotion for Chase financial products." This implies that the context is related to the credit card industry and may involve disputes.

However, without more information, I can only make an educated guess that some common complaints about credit card billing disputes may include:

1. Discrepancies in billing or charges
2. Issues with credit limit increases or decreases
3. Problems with promotional

In [None]:
import pandas as pd
import os
from huggingface_hub import InferenceClient

# 1. LOAD TOKEN SECURELY
# This assumes you have loaded your .env file or set the variable
HF_TOKEN = os.getenv("HF_TOKEN")

if not HF_TOKEN:
    # If not using .env, you can use an input prompt (safer for notebooks)
    # HF_TOKEN = input("Enter your HF Token: ")
    print("⚠️ Warning: No HF_TOKEN found in environment variables.")

# 2. INITIALIZE THE CLIENT
client = InferenceClient(api_key=HF_TOKEN)

def generate_rag_response(query, context):
    """
    Standardized approach using InferenceClient for Chat Completion.
    """
    prompt = f"""You are a financial analyst for CrediTrust. 
Answer the question based ONLY on the provided context. 
If the answer is not in the context, say you don't have enough information.

Context: {context}
Question: {query}"""

    try:
        completion = client.chat.completions.create(
            model="HuggingFaceH4/zephyr-7b-beta", 
            messages=[{"role": "user", "content": prompt}],
            max_tokens=500,
            temperature=0.7
        )
        return completion.choices[0].message.content
    except Exception as e:
        return f"Final Connection Error: {str(e)}"

# 3. RUN THE FINAL EVALUATION
eval_questions = [
    "What are common complaints about credit card billing disputes?", 
    "What difficulties do customers face during the personal loan application process?",
    "Are there reports of unauthorized or hidden fees in savings accounts?",
    "What issues cause delays or problems with international money transfers?",
    "Who is the current CEO of CrediTrust?"
]

all_results = []
for q in eval_questions:
    print(f"Testing: {q}")
    # Ensure get_relevant_context is defined in your notebook/script environment
    context, docs, meta = get_relevant_context(q, k=5) 
    answer = generate_rag_response(q, context)
    
    all_results.append({
        "Question": q,
        "Generated Answer": answer,
        "Retrieved Sources": docs[0][:150].strip() + "..." if docs else "N/A"
    })

# 4. OUTPUT AS MARKDOWN TABLE
eval_df = pd.DataFrame(all_results)
print("\n--- FINAL QUALITATIVE EVALUATION TABLE ---")
print(eval_df.to_markdown(index=False))

Testing: What are common complaints about credit card billing disputes?
Testing: What difficulties do customers face during the personal loan application process?
Testing: Are there reports of unauthorized or hidden fees in savings accounts?
Testing: What issues cause delays or problems with international money transfers?
Testing: Who is the current CEO of CrediTrust?

--- FINAL QUALITATIVE EVALUATION TABLE ---
| Question                                                                          | Generated Answer                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   

In [21]:
import sys
import os

# This gets the 'parent' directory (the root of your project)
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))

# Add that root folder to the search path if it's not already there
if project_root not in sys.path:
    sys.path.append(project_root)

# Now, try the import again
import rag_logic
print("Success! The RAG module has been found in the root directory.")

Success! The RAG module has been found in the root directory.


In [22]:
%pip install gradio


[notice] A new release of pip is available: 25.1.1 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


Collecting gradio
  Downloading gradio-6.3.0-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<25.0,>=22.0 (from gradio)
  Downloading aiofiles-24.1.0-py3-none-any.whl.metadata (10 kB)
Collecting audioop-lts<1.0 (from gradio)
  Downloading audioop_lts-0.2.2-cp313-abi3-win_amd64.whl.metadata (2.0 kB)
Collecting brotli>=1.1.0 (from gradio)
  Downloading brotli-1.2.0-cp313-cp313-win_amd64.whl.metadata (6.3 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.128.0-py3-none-any.whl.metadata (30 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-1.0.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==2.0.3 (from gradio)
  Downloading gradio_client-2.0.3-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_