In [1]:
import sys
from pathlib import Path

PROJECT_ROOT = Path.cwd().resolve().parent
sys.path.append(str(PROJECT_ROOT))


from src.rag.vector_store import ComplaintVectorStore
from src.rag.retriever import ComplaintRetriever
from src.rag.generator import ComplaintGenerator
from src.rag.pipeline import ComplaintRAGPipeline
from src.rag.evaluate import evaluate


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Initialize vector store
vector_store = ComplaintVectorStore(
    parquet_path="../data/complaint_embeddings.parquet"
)

# Build FAISS index
vector_store.load_or_build()



Loading existing FAISS index...


<faiss.swigfaiss_avx2.IndexFlatL2; proxy of <Swig Object of type 'faiss::IndexFlatL2 *' at 0x0000011EFA9B1E60> >

In [3]:
vector_store.index.ntotal

1375327

In [4]:
# Retriever using the vector store
retriever = ComplaintRetriever(
    vector_store=vector_store
)

# Generator
generator = ComplaintGenerator()


Device set to use cpu


✓ ComplaintGenerator loaded: google/flan-t5-base


In [16]:
results = retriever.retrieve("What are the common complaints about credit cards?")

for i, res in enumerate(results):
    print(f"--- Result {i+1} ---")
    print(f"TEXT: {res['text'][:200]}...") # Printing first 200 chars for brevity
    print(f"METADATA: {res['metadata']}")
    print(f"SCORE: {res['score']}")
    print("\n")

--- Result 1 ---
TEXT: ompetence of the bank of america credit card team. i have used many credit cards, and i have never encountered such issues, and have to talk to clueless representatives over and over again....
METADATA: {'chunk_index': 5, 'company': 'BANK OF AMERICA, NATIONAL ASSOCIATION', 'complaint_id': '10964872', 'date_received': '2024-11-28', 'issue': 'Trouble using your card', 'product': 'Credit card', 'product_category': 'Credit Card', 'state': 'WA', 'sub_issue': "Can't use card to make purchases", 'total_chunks': 6}
SCORE: 0.5193817019462585


--- Result 2 ---
TEXT: nd i have learned a lot over this complaint cycle. maybe at the least it will help others that have a similar experience with and their credit card company. in my opinion, and for various facts discov...
METADATA: {'chunk_index': 1, 'company': 'BANK OF AMERICA, NATIONAL ASSOCIATION', 'complaint_id': '2859173', 'date_received': '2018-03-29', 'issue': 'Problem with a purchase shown on your statement', 'product':

In [5]:
# Connect retriever + generator
rag_pipeline = ComplaintRAGPipeline(
    retriever=retriever,
    generator=generator
)


In [6]:
question = "What problems do customers have with credit cards?"

result = rag_pipeline.answer(question)

print("Question:", result["question"])
print("Answer:\n", result["answer"])
print("\nSample Source Metadata:", result["sources"][0]["metadata"])


Question: What problems do customers have with credit cards?
Answer:
 lack of service and clarity

Sample Source Metadata: {'chunk_index': 2, 'company': 'CITIBANK, N.A.', 'complaint_id': '6063390', 'date_received': '2022-10-07', 'issue': 'Other features, terms, or problems', 'product': 'Credit card or prepaid card', 'product_category': 'Credit Card', 'state': 'AZ', 'sub_issue': 'Other problem', 'total_chunks': 3}


In [7]:
question = "Are there delays with money transfers?"

result = rag_pipeline.answer(question)

print("Question:", result["question"])
print("Answer:\n", result["answer"])
print("\nSample Source Metadata:", result["sources"][0]["metadata"])


Question: Are there delays with money transfers?
Answer:
 there can be delays but holding onto an account holders money for 3 weeks seems excessive - especially given that the dollar amount is so low. it also seems that there should be a requirement that the bank provides notification of this delay but there has been zero communication co delays but it is not in their account to this day. i called them and wanted to talk to a supervisor and they advised me to reply to the email instead of calling here. i did respond to the email again on that day and havent heard anything again. the resolution they had was since it was transferred with delays they will reimburse me the cent but i ha and they went smoothly within the promised 5 business days. in fact, i initiated another transfer after this delayed transaction, and it has already been successfully paid out to the recipient. this further adds to my confusion and concern regarding the current delayed transfer. the funds from the delayed t

In [8]:
question = "What issues do customers report about personal loans?"

result = rag_pipeline.answer(question)

print("Question:", result["question"])
print("Answer:\n", result["answer"])
print("\nSample Source Metadata:", result["sources"][0]["metadata"])


Question: What issues do customers report about personal loans?
Answer:
 early loan repayments, interest reimbursements, and effects of consumer grievances on subsequent loan applications

Sample Source Metadata: {'chunk_index': 0, 'company': 'Self Financial Inc.', 'complaint_id': '10792484', 'date_received': '2024-11-13', 'issue': "Received a loan you didn't apply for", 'product': 'Payday loan, title loan, personal loan, or advance loan', 'product_category': 'Personal Loan', 'state': 'PA', 'sub_issue': 'nan', 'total_chunks': 1}


In [9]:
question = "Do customers complain about unauthorized transactions?"

result = rag_pipeline.answer(question)

print("Question:", result["question"])
print("Answer:\n", result["answer"])
print("\nSample Source Metadata:", result["sources"][0]["metadata"])


Question: Do customers complain about unauthorized transactions?
Answer:
 no

Sample Source Metadata: {'chunk_index': 0, 'company': 'Block, Inc.', 'complaint_id': '11631957', 'date_received': '2025-01-19', 'issue': 'Unauthorized transactions or other transaction problem', 'product': 'Money transfer, virtual currency, or money service', 'product_category': 'Money Transfer', 'state': 'TX', 'sub_issue': 'nan', 'total_chunks': 1}


In [10]:
question = "Are customers unhappy with savings account fees?"

result = rag_pipeline.answer(question)

print("Question:", result["question"])
print("Answer:\n", result["answer"])
print("\nSample Source Metadata:", result["sources"][0]["metadata"])


Question: Are customers unhappy with savings account fees?
Answer:
 yes

Sample Source Metadata: {'chunk_index': 1, 'company': 'WELLS FARGO & COMPANY', 'complaint_id': '2778339', 'date_received': '2018-01-10', 'issue': 'Managing an account', 'product': 'Checking or savings account', 'product_category': 'Savings Account', 'state': 'FL', 'sub_issue': 'Fee problem', 'total_chunks': 2}


In [11]:
# Run all predefined evaluation questions
eval_df = evaluate(rag_pipeline)
eval_df


Starting evaluation on 15 questions...
✓ Evaluation complete. Results saved to 'rag_evaluation_table.csv'.


Unnamed: 0,Question,Generated Answer,Retrieved Sources,Quality Score (1-5),Comments/Analysis
0,What problems do customers have with credit ca...,lack of service and clarity,Source 1: me i've run into problems with their...,,
1,Do customers complain about credit card billing?,no,Source 1: nd i have learned a lot over this co...,,
2,Are there issues with credit card fees?,yes,Source 1: when signing up for this credit card...,,
3,Are there delays with money transfers?,there can be delays but holding onto an accoun...,Source 1: o people like myself can make a deci...,,
4,Do customers report failed money transfers?,yes,Source 1: should be expected with any money tr...,,
5,Are international transfers a problem?,"I rarely use international transfers, i used a...","Source 1: n research on my end, i confirmed th...",,
6,What issues do customers report about personal...,"early loan repayments, interest reimbursements...",Source 1: of its reporting of a balance of for...,,
7,Do customers complain about loan payments?,no,Source 1: to them on a monthly basis in the fo...,,
8,Are loan interest rates a common complaint?,no,Source 1: i dont know what the interest rate i...,,
9,Do customers complain about unauthorized trans...,no,Source 1: unauthorized transactions and no cus...,,


In [12]:
def df_to_markdown(df):
    """Convert a DataFrame to a Markdown table string without external libraries."""
    # Header
    md = "| " + " | ".join(df.columns) + " |\n"
    md += "| " + " | ".join(["---"] * len(df.columns)) + " |\n"

    # Rows
    for _, row in df.iterrows():
        md += "| " + " | ".join(str(x) for x in row.values) + " |\n"
    return md

# Usage
print(df_to_markdown(eval_df))


| Question | Generated Answer | Retrieved Sources | Quality Score (1-5) | Comments/Analysis |
| --- | --- | --- | --- | --- |
| What problems do customers have with credit cards? | lack of service and clarity | Source 1: me i've run into problems with their credit cards. this is just the first time it's impacted my ability to make critical purchases.... | Source 2: the lack of service and clarity for its customers. the process i am going through often happens in times of stress and sadness. and while families are... |  |  |
| Do customers complain about credit card billing? | no | Source 1: nd i have learned a lot over this complaint cycle. maybe at the least it will help others that have a similar experience with and their credit card co... | Source 2: as both a business owner and a head of household, i have maintained dozens of credit cards during the last 30 years, and i have never had to complain ... |  |  |
| Are there issues with credit card fees? | yes | Source 1: when signing u