# Task 3: Building the RAG Core Logic and Evaluation
## Intelligent Complaint Analysis for Financial Services

**Objective:** Build the retrieval and generation pipeline using the pre-built full-scale vector store, and evaluate its effectiveness.

In [None]:
import sys
sys.path.append('..')

import pandas as pd
import numpy as np
from pathlib import Path

# Import custom modules
from src.rag_pipeline import RAGPipeline, EvaluationFramework

import warnings
warnings.filterwarnings('ignore')

## Step 1: Load Pre-built Embeddings and Create Vector Store

First, we need to load the pre-built embeddings from the parquet file and create our ChromaDB vector store.

In [None]:
# Check if vector store already exists
vector_store_path = Path('../vector_store')

if not vector_store_path.exists() or not list(vector_store_path.glob('*')):
    print("Vector store not found. Creating from pre-built embeddings...")
    print("This may take several minutes...\n")
    
    # Run the loading script
    %run ../src/load_prebuilt_embeddings.py
else:
    print("✅ Vector store already exists!")
    print(f"Location: {vector_store_path}")

## Step 2: Initialize RAG Pipeline

In [None]:
# Initialize the RAG pipeline
print("Initializing RAG Pipeline...\n")

rag = RAGPipeline(
    vector_store_path='../vector_store',
    collection_name='complaint_embeddings_full',
    embedding_model='sentence-transformers/all-MiniLM-L6-v2'
)

print("\n✅ RAG Pipeline initialized successfully!")

## Step 3: Test Retrieval

Let's test the retrieval component with some sample queries.

In [None]:
# Test query 1
test_query = "Why are people unhappy with Credit Cards?"

print(f"Query: {test_query}")
print("\nRetrieving relevant complaints...\n")

results = rag.retrieve(query=test_query, n_results=5)

print(f"Retrieved {len(results['documents'][0])} chunks\n")
print("=" * 80)

for i, (doc, metadata, distance) in enumerate(zip(
    results['documents'][0],
    results['metadatas'][0],
    results['distances'][0]
), 1):
    print(f"\nResult {i}:")
    print(f"  Product: {metadata.get('product', 'Unknown')}")
    print(f"  Issue: {metadata.get('issue', 'Unknown')}")
    print(f"  Distance: {distance:.4f}")
    print(f"  Text: {doc[:200]}...")
    print("-" * 80)

In [None]:
# Test query 2 with product filter
test_query_2 = "What problems do customers have with loan payments?"

print(f"Query: {test_query_2}")
print("Filter: Personal Loan\n")

results_2 = rag.retrieve(
    query=test_query_2,
    n_results=5,
    product_filter="loan"
)

print(f"Retrieved {len(results_2['documents'][0])} chunks\n")
print("=" * 80)

for i, (doc, metadata, distance) in enumerate(zip(
    results_2['documents'][0],
    results_2['metadatas'][0],
    results_2['distances'][0]
), 1):
    print(f"\nResult {i}:")
    print(f"  Product: {metadata.get('product', 'Unknown')}")
    print(f"  Issue: {metadata.get('issue', 'Unknown')}")
    print(f"  Distance: {distance:.4f}")
    print(f"  Text: {doc[:200]}...")
    print("-" * 80)

## Step 4: Test Complete RAG Pipeline

Now let's test the complete pipeline including answer generation.

In [None]:
# Generate answer for a query
query = "What are the main issues customers face with credit card billing?"

print(f"Question: {query}\n")
print("Generating answer...\n")

response = rag.generate_answer(
    query=query,
    n_results=5,
    use_llm=False  # Set to True if you have LLM configured
)

print("=" * 80)
print("ANSWER:")
print("=" * 80)
print(response['answer'])
print("\n" + "=" * 80)
print(f"Based on {response['n_sources']} source complaints")
print("=" * 80)

In [None]:
# Show the sources
print("\nSOURCE COMPLAINTS:\n")

for i, source in enumerate(response['sources'][:3], 1):
    print(f"Source {i}:")
    print(f"  Product: {source['metadata'].get('product', 'Unknown')}")
    print(f"  Issue: {source['metadata'].get('issue', 'Unknown')}")
    print(f"  Company: {source['metadata'].get('company', 'Unknown')}")
    print(f"  Relevance Score: {1 - source['distance']:.4f}")
    print(f"  Text: {source['text'][:250]}...")
    print("\n" + "-" * 80 + "\n")

## Step 5: Qualitative Evaluation

Create a comprehensive evaluation with multiple test questions.

In [None]:
# Initialize evaluation framework
evaluator = EvaluationFramework(rag)

# Define test questions
test_questions = [
    {
        'question': "Why are people unhappy with Credit Cards?",
        'expected_themes': ['billing', 'fees', 'interest rates', 'unauthorized charges'],
        'product_filter': None
    },
    {
        'question': "What are the most common complaints about personal loans?",
        'expected_themes': ['payment issues', 'interest rates', 'customer service'],
        'product_filter': 'loan'
    },
    {
        'question': "What problems do customers report with savings accounts?",
        'expected_themes': ['access issues', 'fees', 'account closure'],
        'product_filter': 'savings'
    },
    {
        'question': "What issues arise with money transfers?",
        'expected_themes': ['delays', 'fees', 'failed transfers', 'fraud'],
        'product_filter': 'transfer'
    },
    {
        'question': "How do customers describe problems with unauthorized credit card charges?",
        'expected_themes': ['fraud', 'dispute resolution', 'refunds'],
        'product_filter': 'credit'
    },
    {
        'question': "What are the main customer service complaints across all products?",
        'expected_themes': ['response time', 'unhelpful staff', 'communication'],
        'product_filter': None
    },
    {
        'question': "What billing disputes do customers mention most frequently?",
        'expected_themes': ['incorrect charges', 'double billing', 'fees'],
        'product_filter': None
    },
    {
        'question': "What are customers saying about loan application rejections?",
        'expected_themes': ['credit score', 'documentation', 'explanation'],
        'product_filter': 'loan'
    }
]

# Add questions to evaluator
for q in test_questions:
    evaluator.add_test_question(
        question=q['question'],
        expected_themes=q['expected_themes'],
        product_filter=q['product_filter']
    )

print(f"Added {len(test_questions)} test questions for evaluation")

In [None]:
# Run evaluation
print("Running evaluation...\n")
results_df = evaluator.run_evaluation(n_results=5)

print("\n✅ Evaluation complete!")
print(f"\nResults summary:")
print(results_df[['question', 'n_sources', 'top_product', 'avg_distance']].to_string())

In [None]:
# Print detailed evaluation report
evaluator.print_evaluation_report()

## Step 6: Create Evaluation Table for Report

In [None]:
# Create evaluation table for the final report
eval_table = []

for i, result in enumerate(evaluator.results, 1):
    # Quality score (1-5) based on relevance
    avg_distance = result['avg_distance']
    quality_score = max(1, min(5, int((1 - avg_distance) * 5) + 1))
    
    # Get top 2 sources
    sources = result['sources_preview']
    source_summary = ""
    if sources:
        source_summary = f"{sources[0]['metadata'].get('product', 'Unknown')} - {sources[0]['metadata'].get('issue', 'Unknown')}"
    
    # Analysis
    analysis = f"Retrieved {result['n_sources']} relevant sources. "
    if avg_distance < 0.5:
        analysis += "High relevance - sources closely match query."
    elif avg_distance < 0.7:
        analysis += "Good relevance - sources provide useful context."
    else:
        analysis += "Moderate relevance - may need query refinement."
    
    eval_table.append({
        'Question': result['question'][:60] + '...' if len(result['question']) > 60 else result['question'],
        'Generated Answer': result['answer'][:150] + '...' if len(result['answer']) > 150 else result['answer'],
        'Retrieved Sources': source_summary,
        'Quality Score (1-5)': quality_score,
        'Comments/Analysis': analysis
    })

eval_df = pd.DataFrame(eval_table)
print("\nEVALUATION TABLE FOR REPORT:")
print("=" * 120)
print(eval_df.to_markdown(index=False))

In [None]:
# Save evaluation results
output_path = Path('../data/processed/rag_evaluation_results.csv')
eval_df.to_csv(output_path, index=False)
print(f"\n✅ Evaluation results saved to: {output_path}")

## Step 7: Analysis and Insights

In [None]:
# Analyze retrieval performance
import matplotlib.pyplot as plt
import seaborn as sns

sns.set_style('whitegrid')

# Distance distribution
distances = [r['avg_distance'] for r in evaluator.results]

plt.figure(figsize=(10, 6))
plt.hist(distances, bins=10, color='skyblue', edgecolor='black')
plt.xlabel('Average Distance', fontsize=12)
plt.ylabel('Frequency', fontsize=12)
plt.title('Distribution of Retrieval Distances', fontsize=14, fontweight='bold')
plt.axvline(np.mean(distances), color='red', linestyle='--', label=f'Mean: {np.mean(distances):.3f}')
plt.legend()
plt.tight_layout()
plt.show()

print(f"\nRetrieval Statistics:")
print(f"  Mean Distance: {np.mean(distances):.4f}")
print(f"  Median Distance: {np.median(distances):.4f}")
print(f"  Min Distance: {np.min(distances):.4f}")
print(f"  Max Distance: {np.max(distances):.4f}")

## Summary

### Task 3 Completed! ✅

**What We Built:**
1. **Retriever**: Semantic search using ChromaDB and sentence transformers
2. **Prompt Template**: Structured prompt for financial complaint analysis
3. **Generator**: Answer synthesis (with LLM integration support)
4. **Evaluation Framework**: Systematic testing with multiple queries

**Key Findings:**
- The retrieval system successfully finds relevant complaints
- Average retrieval distance indicates good semantic matching
- Product filtering works effectively
- Sources are traceable and verifiable

**What Works Well:**
- Semantic search captures intent beyond keyword matching
- Metadata filtering enables product-specific queries
- Retrieved sources are contextually relevant

**Areas for Improvement:**
- LLM integration would provide more natural answers
- Could experiment with different chunk sizes
- May benefit from query expansion techniques

**Next Steps:**
- Proceed to Task 4: Build interactive UI with Gradio/Streamlit
- Integrate this RAG pipeline into the chat interface
- Add source citation display and streaming