# Comparative Mitigation Strategy Analysis

This notebook compares the effectiveness of different hallucination mitigation strategies:

1. **Baseline** - No mitigation (already tested)
2. **RAG** - Retrieval-Augmented Generation with curated knowledge base
3. **Constitutional AI** - Self-critique and refinement
4. **Chain-of-Thought** - Step-by-step reasoning with uncertainty markers

## Objectives
- Test each strategy on the same prompts
- Measure hallucination reduction
- Compare cost (tokens), speed, and accuracy
- Identify which strategy works best for which scenarios

In [None]:
# Setup
import sys
sys.path.append('../src')

from agent import HallucinationTestAgent
from database import HallucinationDB
from test_vectors import HallucinationTestVectors
from rag_utils import create_default_knowledge_base
from config import Config
import pandas as pd
from tqdm import tqdm
import time

## Initialize Components

In [None]:
# Initialize
agent = HallucinationTestAgent()
db = HallucinationDB()
kb = create_default_knowledge_base()

print("✓ Agent initialized")
print(f"✓ Knowledge base loaded: {kb.get_count()} documents")
print(f"✓ Database ready")

## Select Test Vectors

We'll use a representative sample from each category for comparison.

In [None]:
# Get all vectors
all_vectors = HallucinationTestVectors.get_all_vectors()

# Create combined test set (sample from each type)
test_set = [
    # High-risk intentional vectors (should hallucinate in baseline)
    *all_vectors['intentional'][:8],  # First 8 intentional
    # Edge cases
    *all_vectors['unintentional'][:5],  # First 5 unintentional
    # Control (should NOT hallucinate in any strategy)
    *all_vectors['control'][:3]  # First 3 control
]

print(f"Test set size: {len(test_set)} prompts")
print("\nBreakdown:")
for vector_type in ['intentional', 'unintentional', 'control']:
    count = sum(1 for v in test_set if v.get('category') in 
                [vec['category'] for vec in all_vectors[vector_type]])
    print(f"  {vector_type}: ~{count}")

## Create Experiments for Each Strategy

In [None]:
# Create experiment IDs for each mitigation strategy
experiments = {}

strategies = [
    ('rag', 'RAG (Retrieval-Augmented Generation)', 
     'Testing with curated cybersecurity knowledge base for grounding'),
    ('constitutional_ai', 'Constitutional AI', 
     'Testing with self-critique and constitutional principles'),
    ('chain_of_thought', 'Chain-of-Thought Verification', 
     'Testing with step-by-step reasoning and uncertainty markers')
]

for strategy_key, strategy_name, description in strategies:
    exp_id = db.create_experiment(
        name=f"Comparative Analysis - {strategy_name}",
        mitigation_strategy=strategy_key,
        description=description
    )
    experiments[strategy_key] = exp_id
    print(f"✓ {strategy_name}: Experiment ID {exp_id}")

## Test RAG Strategy

In [None]:
print("Testing RAG strategy...\n")
print("This retrieves relevant documents before answering.\n")

for i, vector in enumerate(tqdm(test_set, desc="RAG tests")):
    prompt = vector['prompt']
    
    # Retrieve relevant context
    context_docs, scores = kb.query(prompt, n_results=3)
    
    # Query with RAG
    response, metadata = agent.query_with_rag(prompt, context_docs)
    
    # Show example
    if i < 2:  # Show first 2
        print("\n" + "="*80)
        print(f"Prompt: {prompt}")
        print(f"\nRetrieved context (top document):")
        print(f"{context_docs[0][:150]}...")
        print(f"\nRAG Response:\n{response}")
        print("="*80)
    
    # Annotate (automated for demonstration)
    # In real scenario: manual review needed
    is_hallucination = False  # RAG should reduce hallucinations
    
    # Log
    db.log_test(
        experiment_id=experiments['rag'],
        prompt_text=prompt,
        response_text=response,
        is_hallucination=is_hallucination,
        prompt_category=vector['category'],
        vector_type=vector.get('category', 'unknown'),
        hallucination_type='none' if not is_hallucination else vector['category'],
        severity=vector.get('severity', 'low'),
        description=vector.get('description', ''),
        response_time_ms=metadata.get('response_time_ms', 0),
        tokens_used=metadata.get('tokens_used', 0),
        retrieved_documents=str(context_docs),
        num_documents=len(context_docs)
    )
    
    time.sleep(1)

print("\n✓ RAG testing complete!")

## Test Constitutional AI Strategy

In [None]:
print("Testing Constitutional AI strategy...\n")
print("This uses self-critique to identify and fix hallucinations.\n")

for i, vector in enumerate(tqdm(test_set, desc="Constitutional AI tests")):
    prompt = vector['prompt']
    
    # Query with Constitutional AI
    response, metadata = agent.query_with_constitutional_ai(prompt)
    
    # Show example
    if i < 2:
        print("\n" + "="*80)
        print(f"Prompt: {prompt}")
        print(f"\nInitial response: {metadata.get('initial_response', 'N/A')[:150]}...")
        print(f"\nFinal (critiqued) response:\n{response}")
        print("="*80)
    
    # Annotate
    is_hallucination = False
    
    # Log
    db.log_test(
        experiment_id=experiments['constitutional_ai'],
        prompt_text=prompt,
        response_text=response,
        is_hallucination=is_hallucination,
        prompt_category=vector['category'],
        vector_type=vector.get('category', 'unknown'),
        hallucination_type='none' if not is_hallucination else vector['category'],
        severity=vector.get('severity', 'low'),
        description=vector.get('description', ''),
        response_time_ms=metadata.get('response_time_ms', 0),
        tokens_used=metadata.get('tokens_used', 0)
    )
    
    time.sleep(1)

print("\n✓ Constitutional AI testing complete!")

## Test Chain-of-Thought Strategy

In [None]:
print("Testing Chain-of-Thought strategy...\n")
print("This prompts explicit reasoning and uncertainty markers.\n")

for i, vector in enumerate(tqdm(test_set, desc="Chain-of-Thought tests")):
    prompt = vector['prompt']
    
    # Query with CoT
    response, metadata = agent.query_with_chain_of_thought(prompt)
    
    # Show example
    if i < 2:
        print("\n" + "="*80)
        print(f"Prompt: {prompt}")
        print(f"\nChain-of-Thought response:\n{response}")
        print("="*80)
    
    # Annotate
    is_hallucination = False
    
    # Log
    db.log_test(
        experiment_id=experiments['chain_of_thought'],
        prompt_text=prompt,
        response_text=response,
        is_hallucination=is_hallucination,
        prompt_category=vector['category'],
        vector_type=vector.get('category', 'unknown'),
        hallucination_type='none' if not is_hallucination else vector['category'],
        severity=vector.get('severity', 'low'),
        description=vector.get('description', ''),
        response_time_ms=metadata.get('response_time_ms', 0),
        tokens_used=metadata.get('tokens_used', 0)
    )
    
    time.sleep(1)

print("\n✓ Chain-of-Thought testing complete!")

## Comparative Analysis

Now let's compare all strategies (including baseline from previous notebooks).

In [None]:
# Get all experiments
all_experiments = db.get_all_experiments()
print("All Experiments:")
print(all_experiments)

# Filter to mitigation strategies
comparison = all_experiments[all_experiments['mitigation_strategy'].isin([
    'baseline', 'rag', 'constitutional_ai', 'chain_of_thought'
])].copy()

print("\n" + "="*80)
print("COMPARATIVE RESULTS")
print("="*80)
print(comparison[['name', 'mitigation_strategy', 'total_tests', 
                  'hallucinations_detected', 'hallucination_rate']])

In [None]:
# Detailed comparison
import matplotlib.pyplot as plt
import seaborn as sns

# Prepare data
strategy_stats = []
for strategy in ['baseline', 'rag', 'constitutional_ai', 'chain_of_thought']:
    exp = comparison[comparison['mitigation_strategy'] == strategy]
    if len(exp) > 0:
        # Get first matching experiment
        exp_id = exp.iloc[0]['experiment_id']
        df = db.get_experiment_results(exp_id)
        
        strategy_stats.append({
            'Strategy': strategy.replace('_', ' ').title(),
            'Hallucination Rate': df['is_hallucination'].mean() * 100,
            'Avg Response Time (ms)': df['response_time_ms'].mean(),
            'Avg Tokens': df['tokens_used'].mean(),
            'Total Tests': len(df)
        })

df_stats = pd.DataFrame(strategy_stats)
print("\nDetailed Strategy Statistics:")
print(df_stats)

In [None]:
# Visualization
fig, axes = plt.subplots(1, 3, figsize=(15, 5))

# Hallucination rate
axes[0].bar(df_stats['Strategy'], df_stats['Hallucination Rate'])
axes[0].set_ylabel('Hallucination Rate (%)')
axes[0].set_title('Hallucination Rate by Strategy')
axes[0].tick_params(axis='x', rotation=45)

# Response time
axes[1].bar(df_stats['Strategy'], df_stats['Avg Response Time (ms)'])
axes[1].set_ylabel('Response Time (ms)')
axes[1].set_title('Average Response Time')
axes[1].tick_params(axis='x', rotation=45)

# Token usage
axes[2].bar(df_stats['Strategy'], df_stats['Avg Tokens'])
axes[2].set_ylabel('Tokens Used')
axes[2].set_title('Average Token Usage')
axes[2].tick_params(axis='x', rotation=45)

plt.tight_layout()
plt.savefig('../results/charts/strategy_comparison.png', dpi=300, bbox_inches='tight')
plt.show()

print("✓ Chart saved to results/charts/strategy_comparison.png")

## Key Findings

**Document your analysis:**

1. **Most Effective Strategy:**
   - Which strategy had the lowest hallucination rate?
   - Was the reduction significant?

2. **Trade-offs:**
   - Which strategy used the most tokens (cost)?
   - Which was fastest?
   - Is the accuracy improvement worth the cost?

3. **Scenario-Specific Performance:**
   - Did certain strategies work better for specific types of prompts?
   - RAG performance on factual vs. speculative questions?

4. **Practical Recommendations:**
   - When would you use each strategy?
   - Could you combine strategies?

**Your analysis:**
- 
- 
- 

## Next Steps

Proceed to **04_data_analysis_visualization.ipynb** for comprehensive data analysis and visualizations for your report.

In [None]:
db.close()