# Experiment 3: RAG Impact Analysis

Comparing Retrieval-Augmented Generation (RAG) with full-context approaches for efficiency and accuracy.

In [None]:
# Import required libraries
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import sys

# Add src to path
sys.path.append(str(Path.cwd().parent / 'src'))

# Set plotting style
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette('Set2')
%matplotlib inline

## Load and Analyze Results

Compare RAG and full-context approaches across multiple metrics.

In [None]:
# Load Experiment 3 results
exp3_path = Path.cwd().parent / 'results' / 'exp3' / 'results.json'

with open(exp3_path, 'r', encoding='utf-8') as f:
    exp3_results = json.load(f)

# Extract comparison data
comparison = exp3_results['comparison']

# Create DataFrame
exp3_df = pd.DataFrame([
    {'Method': 'Full Context', **comparison['full_context']},
    {'Method': 'RAG', **comparison['rag']}
])

print("Experiment 3 Summary:")
print(exp3_df.to_string(index=False))

## Performance Comparison

Quantify the improvements provided by RAG.

In [None]:
# Calculate improvements
acc_improvement = ((comparison['rag']['accuracy'] - comparison['full_context']['accuracy']) / 
                   comparison['full_context']['accuracy']) * 100
speedup = comparison['full_context']['latency'] / comparison['rag']['latency']
token_reduction = ((comparison['full_context']['tokens_used'] - comparison['rag']['tokens_used']) / 
                   comparison['full_context']['tokens_used']) * 100

print(f"\nRAG Improvements:")
print(f"  Accuracy: {acc_improvement:+.1f}%")
print(f"  Speedup: {speedup:.2f}x faster")
print(f"  Token reduction: {token_reduction:.1f}%")

# Cost efficiency
if token_reduction > 0:
    print(f"\nCost Efficiency:")
    print(f"  RAG uses {token_reduction:.1f}% fewer tokens")
    print(f"  At scale, this translates to {token_reduction:.1f}% cost reduction")

## Visualization

Display the RAG vs Full Context comparison plot.

In [None]:
from IPython.display import Image, display

# Display plot
plot_path = Path.cwd().parent / 'results' / 'exp3' / 'rag_comparison.png'
if plot_path.exists():
    print("RAG vs Full Context Comparison:")
    display(Image(filename=str(plot_path)))
else:
    print(f"Plot not found at {plot_path}")

## Sensitivity Analysis (Optional)

If sensitivity analysis results exist, load and display them.

In [None]:
# Check for sensitivity analysis results
sensitivity_path = Path.cwd().parent / 'results' / 'exp3' / 'sensitivity_analysis.json'
if sensitivity_path.exists():
    with open(sensitivity_path, 'r', encoding='utf-8') as f:
        sensitivity = json.load(f)
    
    print("\nSensitivity Analysis Summary:")
    print(f"Configurations tested: {len(sensitivity['results'])}")
    print(f"Best configuration: chunk_size={sensitivity['best_config']['chunk_size']}, top_k={sensitivity['best_config']['top_k']}")
    print(f"Best accuracy: {sensitivity['best_config']['mean_accuracy']:.3f}")
    
    # Display heatmap if available
    heatmap_path = Path.cwd().parent / 'results' / 'exp3' / 'sensitivity_analysis_heatmap.png'
    if heatmap_path.exists():
        print("\nParameter Sensitivity Heatmap:")
        display(Image(filename=str(heatmap_path)))
else:
    print("\nSensitivity analysis not available.")

## Conclusions

**Key Findings:**
1. RAG achieves comparable or better accuracy with significantly fewer tokens
2. RAG is faster due to reduced context processing overhead
3. Token reduction translates directly to cost savings at scale

**When to Use RAG:**
- Large document collections (50+ documents)
- Cost-sensitive applications
- Low-latency requirements
- When retrieval quality is good (relevant chunks are retrievable)

**When to Use Full Context:**
- Small document sets (< 10 documents)
- When all context is equally important
- When retrieval might miss critical information

**Optimal Configuration:**
- chunk_size: 500 tokens (balance between context and granularity)
- top_k: 3 chunks (sufficient coverage without noise)