# RAG Implementation Tuning Template

This notebook provides a template for tuning a RAG implementation's parameters and configurations. It can be used to optimize:
- Chunking parameters
- Retrieval settings
- Model parameters
- Embedding configurations
- Any other implementation-specific parameters

In [None]:
import os
from pathlib import Path
import pandas as pd
import numpy as np
from typing import Dict, List, Any
from itertools import product

# Import utilities
import utils_setup
from utils import RAGMetricsEvaluator, BenchmarkVisualizer, notebook_to_module

## Configuration

Define the implementation to tune and the parameter space to explore.

In [None]:
# Tuning configuration
config = {
    "implementation": {
        "name": "baseline_rag",  # or "graph_rag", etc.
        "notebook_path": "../../rag_implementations/baseline_rag/implementation.ipynb"
    },
    "parameter_grid": {
        "chunk_size": [256, 512, 1024],
        "chunk_overlap": [0, 50, 100],
        "retrieval_k": [3, 5, 7],
        "similarity_threshold": [0.7, 0.8, 0.9]
    },
    "datasets": {
        "labeled": ["tuning_dataset"],  # Dataset for parameter tuning
        "validation": ["validation_dataset"]  # Dataset for validating tuned parameters
    },
    "metrics": [
        "faithfulness",
        "context_precision",
        "response_relevancy",
        "context_recall",
        "context_entities_recall"
    ],
    "optimization": {
        "metric_weights": {  # Weights for combining metrics into a single score
            "faithfulness": 0.3,
            "context_precision": 0.2,
            "response_relevancy": 0.3,
            "context_recall": 0.1,
            "context_entities_recall": 0.1
        }
    },
    "batch_size": 20,
    "sleep_time": 1
}

## Load Implementation

In [None]:
# Load the implementation to tune
try:
    module = notebook_to_module(config["implementation"]["notebook_path"])
    RAGImplementation = module.RAGImplementation
except Exception as e:
    print(f"Error loading implementation: {str(e)}")

## Parameter Tuning Functions

In [None]:
def generate_parameter_combinations(parameter_grid: Dict[str, List]) -> List[Dict]:
    """Generate all possible parameter combinations from the grid."""
    keys = parameter_grid.keys()
    values = parameter_grid.values()
    combinations = [dict(zip(keys, v)) for v in product(*values)]
    return combinations

def calculate_aggregate_score(results: Dict[str, float], weights: Dict[str, float]) -> float:
    """Calculate weighted average of metrics."""
    score = 0
    for metric, value in results.items():
        if metric in weights:
            score += value * weights[metric]
    return score

async def evaluate_parameters(
    params: Dict,
    dataset_name: str,
    evaluator: RAGMetricsEvaluator
) -> Dict[str, float]:
    """Evaluate a specific parameter combination."""
    # Initialize implementation with parameters
    implementation = RAGImplementation(**params)
    
    # Load dataset
    queries, contexts, reference_answers = load_llama_dataset(dataset_name)
    
    # Generate answers
    generated_answers = [implementation.query(q) for q in queries]
    
    # Evaluate
    results = await evaluator.evaluate_labeled(
        queries=queries,
        contexts=contexts,
        generated_answers=generated_answers,
        reference_answers=reference_answers
    )
    
    return results

def visualize_tuning_results(
    results: List[Dict],
    output_dir: str
):
    """Create visualizations for parameter tuning results."""
    visualizer = BenchmarkVisualizer()
    
    # Ensure output directory exists
    os.makedirs(output_dir, exist_ok=True)
    
    # Convert results to DataFrame
    df = pd.DataFrame(results)
    
    # Create parameter impact visualizations
    for param in config["parameter_grid"].keys():
        param_data = df.groupby(param)["aggregate_score"].mean()
        visualizer.plot_comparison(
            param_data,
            comparison_type=f"Parameter Impact: {param}",
            plot_type="line",
            save_path=f"{output_dir}/{param}_impact.png"
        )
    
    # Create correlation heatmap
    visualizer.plot_comparison(
        df[list(config["parameter_grid"].keys()) + ["aggregate_score"]].corr(),
        comparison_type="Parameter Correlations",
        plot_type="heatmap",
        save_path=f"{output_dir}/parameter_correlations.png"
    )

## Run Parameter Tuning

In [None]:
# Initialize evaluator
evaluator = RAGMetricsEvaluator(
    batch_size=config["batch_size"],
    sleep_time=config["sleep_time"]
)

# Generate parameter combinations
parameter_combinations = generate_parameter_combinations(config["parameter_grid"])

# Evaluate each combination
results = []
for params in parameter_combinations:
    # Evaluate on tuning dataset
    metrics = await evaluate_parameters(
        params,
        config["datasets"]["labeled"][0],
        evaluator
    )
    
    # Calculate aggregate score
    aggregate_score = calculate_aggregate_score(
        metrics,
        config["optimization"]["metric_weights"]
    )
    
    # Store results
    results.append({
        **params,
        **metrics,
        "aggregate_score": aggregate_score
    })

## Analyze Results

In [None]:
# Convert results to DataFrame
results_df = pd.DataFrame(results)

# Find best parameters
best_params = results_df.loc[results_df["aggregate_score"].idxmax()]
print("Best Parameters:")
for param in config["parameter_grid"].keys():
    print(f"{param}: {best_params[param]}")
print(f"Aggregate Score: {best_params['aggregate_score']}")

# Visualize results
visualize_tuning_results(results, "results/parameter_tuning")

## Validate Best Parameters

In [None]:
# Initialize implementation with best parameters
best_implementation = RAGImplementation(**{
    param: best_params[param]
    for param in config["parameter_grid"].keys()
})

# Evaluate on validation dataset
validation_results = await evaluate_parameters(
    best_params,
    config["datasets"]["validation"][0],
    evaluator
)

print("\nValidation Results:")
for metric, value in validation_results.items():
    print(f"{metric}: {value:.4f}")

## Analysis and Insights

Use this section to analyze the parameter tuning results:

1. Parameter Impact Analysis
   - How each parameter affects performance
   - Parameter interactions and dependencies
   - Sensitivity to parameter changes

2. Validation Performance
   - Generalization to validation dataset
   - Stability of performance
   - Potential overfitting concerns

3. Trade-offs
   - Performance vs. resource usage
   - Quality vs. speed
   - Complexity vs. maintainability

4. Recommendations
   - Optimal parameter settings
   - Parameter ranges to avoid
   - Future tuning suggestions