# Retrival Augmented Generation (RAG)

## Import Required Libraries

In [1]:
import pandas as pd
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)
from src.rag import (retrieve_similar_complaints, load_faiss_index, load_metadata, load_embedding_model,
                     prepare_chunks_and_metadata, generate_llm_answer, generate_llama_llm_answer)

  from .autonotebook import tqdm as notebook_tqdm


## Load Chunked Complaints Data

In [2]:
file_path = '../data/chunked_complaints.csv'
df = pd.read_csv(file_path)

## Load Embedding Model

In [3]:
embedding_model = load_embedding_model('all-MiniLM-L6-v2')

## Load FAISS Index and Metadata

In [4]:
index_path = './vector_store/complaint_chunks.index'
metadata_path = './vector_store/complaint_chunks_metadata.pkl'
index = load_faiss_index(index_path)
metadata_list = load_metadata(metadata_path)

## Prepare Chunks and Metadata

In [5]:
# Prepare chunks and metadata
all_chunks, metadata = prepare_chunks_and_metadata(df)

## Example Retrieval for a Single Question

In [6]:
question = "What are the common reasons for failed transactions in Money Transfer?"
results = retrieve_similar_complaints(question, embedding_model, index, metadata_list, all_chunks, k=5)
for i, (chunk, meta, dist) in enumerate(results, 1):
        print(f"Result {i} (Distance: {dist:.4f})")
        print(f"Complaint ID: {meta['complaint_id']}, Product: {meta['product']}")
        print(f"Text: {chunk}\n")

Result 1 (Distance: 0.6193)
Complaint ID: 3554689, Product: Money transfer, virtual currency, or money service
Text: the attempted transactions other than the 3 error messages immediately following the failed transfer attempts

Result 2 (Distance: 0.6341)
Complaint ID: 4207248, Product: Money transfer, virtual currency, or money service
Text: affects the availability of your money transfer and some transactions

Result 3 (Distance: 0.6344)
Complaint ID: 3563183, Product: Money transfer, virtual currency, or money service
Text: i find this a huge failure in communication and completely unacceptable when it comes to someones finances what if the situation was reversed and i was in fact calling to cancel a transfer if there was a similar failure in communication or someone failed

Result 4 (Distance: 0.6555)
Complaint ID: 10272723, Product: Money transfer, virtual currency, or money service
Text: recipient and secondly processed payments that appeared as failed on their banking app

Resul

## Define Prompt Template

In [7]:
# Prompt template for LLM retrieval-augmented generation
PROMPT_TEMPLATE = (
    "You are a financial analyst assistant for CrediTrust. "
    "Your task is to answer questions about customer complaints. "
    "Use only the following retrieved complaint excerpts to formulate your answer. "
    "If the context does not contain the answer, state that you don't have enough information.\n\n"
    "Context:\n{context}\n\n"
    "Question: {question}\n"
    "Answer:"
)

## Define Evaluation Questions and Generate Answers

In [12]:
questions = [
    "What are the reasons for complaints about saving accounts?",
    "What are the common issues with credit cards?",
    "Summarize fraud-related complaints for Money Transfer",
    "Are there any complaints about loan approval delays?",
    "What are the common reasons for failed transactions in Money Transfer?",
    "Why are customers complaining about hidden fees in Credit Cards?",
    "What evidence suggest fraud in saving accounts complaints?",
    "List complaints related to unauthorized transactions?",
    "What are the common issues with personal loans?",
    "what are refund requests in Money Transfer",
    ]

answers = []

for question in questions:
    # Retrieve similar complaints
    print(f"Processing question: {question}")
    results = retrieve_similar_complaints(question, embedding_model, index, metadata_list, all_chunks, k=15)
    retrieved_chunks = [chunk for chunk, meta, dist in results]
    answer = generate_llama_llm_answer(
        question,
        retrieved_chunks,
        PROMPT_TEMPLATE,
        model_name="meta-llama/Meta-Llama-3.1-8B-Instruct"
    )

    answers.append({
        'question': question,
        'answer': answer,
        'context': "\n".join(retrieved_chunks[:3])  # Use the first 3 chunks as context
    })

# Create DataFrame from the list of dicts
answer_df = pd.DataFrame(answers)
answer_df.to_csv('../data/answers.csv', index=False)

Processing question: What are the reasons for complaints about saving accounts?
Processing question: What are the common issues with credit cards?
Processing question: Summarize fraud-related complaints for Money Transfer
Processing question: Are there any complaints about loan approval delays?
Processing question: What are the common reasons for failed transactions in Money Transfer?
Processing question: Why are customers complaining about hidden fees in Credit Cards?
Processing question: What evidence suggest fraud in saving accounts complaints?
Processing question: List complaints related to unauthorized transactions?
Processing question: What are the common issues with personal loans?
Processing question: what are refund requests in Money Transfer


## Display Generated Answers

In [13]:
answer_df.head()  # Display the first few rows of the answer DataFrame

Unnamed: 0,question,answer,context
0,What are the reasons for complaints about savi...,"Based on the provided complaint excerpts, the ...",for seeking redress of our claim more importan...
1,What are the common issues with credit cards?,"Based on the provided complaint excerpts, the ...",i typically have no problem with credit cards ...
2,Summarize fraud-related complaints for Money T...,"Based on the provided complaint excerpts, here...",xxxx and other money transfers that were fraud...
3,Are there any complaints about loan approval d...,"Yes, there are several complaints about loan a...",that they didnt have to approve us for a loan ...
4,What are the common reasons for failed transac...,"Based on the provided complaint excerpts, the ...",the attempted transactions other than the 3 er...
