In [1]:
import pandas as pd
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)
from src.rag import (retrieve_similar_complaints, load_faiss_index, load_metadata, load_embedding_model,
                     prepare_chunks_and_metadata, generate_llm_answer, generate_llama_llm_answer)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
file_path = '../data/chunked_complaints.csv'
df = pd.read_csv(file_path)

In [3]:
embedding_model = load_embedding_model('all-MiniLM-L6-v2')

In [4]:
index_path = './vector_store/complaint_chunks.index'
metadata_path = './vector_store/complaint_chunks_metadata.pkl'
index = load_faiss_index(index_path)
metadata_list = load_metadata(metadata_path)

In [5]:
# Prepare chunks and metadata
all_chunks, metadata = prepare_chunks_and_metadata(df)

In [20]:
question = "What are the common reasons for failed transactions in Money Transfer?"
results = retrieve_similar_complaints(question, embedding_model, index, metadata_list, all_chunks, k=5)
for i, (chunk, meta, dist) in enumerate(results, 1):
        print(f"Result {i} (Distance: {dist:.4f})")
        print(f"Complaint ID: {meta['complaint_id']}, Product: {meta['product']}")
        print(f"Text: {chunk}\n")

Result 1 (Distance: 0.6193)
Complaint ID: 3554689, Product: Money transfer, virtual currency, or money service
Text: the attempted transactions other than the 3 error messages immediately following the failed transfer attempts

Result 2 (Distance: 0.6341)
Complaint ID: 4207248, Product: Money transfer, virtual currency, or money service
Text: affects the availability of your money transfer and some transactions

Result 3 (Distance: 0.6344)
Complaint ID: 3563183, Product: Money transfer, virtual currency, or money service
Text: i find this a huge failure in communication and completely unacceptable when it comes to someones finances what if the situation was reversed and i was in fact calling to cancel a transfer if there was a similar failure in communication or someone failed

Result 4 (Distance: 0.6555)
Complaint ID: 10272723, Product: Money transfer, virtual currency, or money service
Text: recipient and secondly processed payments that appeared as failed on their banking app

Resul

In [6]:
# Prompt template for LLM retrieval-augmented generation
PROMPT_TEMPLATE = (
    "You are a financial analyst assistant for CrediTrust. "
    "Your task is to answer questions about customer complaints. "
    "Use only the following retrieved complaint excerpts to formulate your answer. "
    "If the context does not contain the answer, state that you don't have enough information.\n\n"
    "Context:\n{context}\n\n"
    "Question: {question}\n"
    "Answer:"
)

In [None]:
questions = [
    "What are the reasons for complaints about saving accounts?",
    "What are the common issues with credit cards?",
    "Summarize fraud-related complaints for Money Transfer",
    "Are there any complaints about loan approval delays?",
    "What are the common reasons for failed transactions in Money Transfer?",
    "Why are customers complaining about hidden fees in Credit Cards?",
    "What evidence suggest fraud in saving accounts complaints?",
    "List complaints related to unauthorized transactions?",
    "What are the common issues with personal loans?",
    "what are refund requests in Money Transfer",
    ]

# create df to store answers
answer_df = pd.DataFrame(columns=['question', 'answer', 'context'])

for question in questions:
    # Retrieve similar complaints
    print(f"Processing question: {question}")
    results = retrieve_similar_complaints(question, embedding_model, index, metadata_list, all_chunks, k=15)
    retrieved_chunks = [chunk for chunk, meta, dist in results]
    answer = generate_llama_llm_answer(
        question,
        retrieved_chunks,
        PROMPT_TEMPLATE,
        model_name="meta-llama/Meta-Llama-3.1-8B-Instruct"
    )

    # Save the answer to a df 
    answer_df = answer_df.append({
        'question': question,
        'answer': answer,
        'context': "\n".join(retrieved_chunks)
    }, ignore_index=True)

# Save the answers to a CSV file
answer_df.to_csv('answers.csv', index=False)

In [None]:
answer_df.head()  # Display the first few rows of the answer DataFrame