# Data Analysis & Visualization

This notebook provides comprehensive analysis and professional visualizations for your research report.

## Objectives
1. Aggregate all experimental data
2. Generate statistical insights
3. Create publication-quality visualizations
4. Provide data for report writing

In [None]:
# Setup
import sys
sys.path.append('../src')

from database import HallucinationDB
from config import Config
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Set style
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (12, 6)
plt.rcParams['font.size'] = 10

print("✓ Libraries loaded")

## Load All Data

In [None]:
# Connect to database
db = HallucinationDB()

# Get all experiments
df_experiments = db.get_all_experiments()
print("Experiments Summary:")
print(df_experiments)

# Get overall statistics
stats = db.get_statistics()
print(f"\nTotal experiments: {stats['total_experiments']}")
print(f"Total tests conducted: {stats['total_tests']}")

print("\nHallucination rates by mitigation strategy:")
print(stats['hallucination_by_strategy'])

In [None]:
# Load all experimental results into one dataframe
all_results = []

for idx, exp in df_experiments.iterrows():
    exp_id = exp['experiment_id']
    df_exp = db.get_experiment_results(exp_id)
    all_results.append(df_exp)

if all_results:
    df_all = pd.concat(all_results, ignore_index=True)
    print(f"\nTotal data points: {len(df_all)}")
    print(f"Date range: {df_all['created_at'].min()} to {df_all['created_at'].max()}")
else:
    print("\n⚠️  No experimental data found. Please run the testing notebooks first.")
    df_all = pd.DataFrame()

## 1. Overall Hallucination Rates

### 1.1 By Mitigation Strategy

In [None]:
if not df_all.empty:
    # Calculate rates by strategy
    strategy_performance = df_all.groupby('mitigation_strategy').agg({
        'is_hallucination': ['count', 'sum', 'mean'],
        'response_time_ms': 'mean',
        'tokens_used': 'mean'
    }).round(3)
    
    strategy_performance.columns = ['Total Tests', 'Hallucinations', 'Hallucination Rate', 
                                    'Avg Response Time (ms)', 'Avg Tokens']
    strategy_performance['Hallucination Rate (%)'] = strategy_performance['Hallucination Rate'] * 100
    
    print("Strategy Performance Metrics:")
    print(strategy_performance)
    
    # Save to CSV
    strategy_performance.to_csv('../results/reports/strategy_performance.csv')
    print("\n✓ Saved to results/reports/strategy_performance.csv")

In [None]:
# Visualization: Hallucination Rate Comparison
if not df_all.empty:
    fig, ax = plt.subplots(figsize=(10, 6))
    
    strategies = strategy_performance.index
    rates = strategy_performance['Hallucination Rate (%)'].values
    
    colors = ['#e74c3c', '#3498db', '#2ecc71', '#f39c12']
    bars = ax.bar(range(len(strategies)), rates, color=colors[:len(strategies)])
    
    ax.set_xlabel('Mitigation Strategy', fontsize=12, fontweight='bold')
    ax.set_ylabel('Hallucination Rate (%)', fontsize=12, fontweight='bold')
    ax.set_title('Hallucination Rate by Mitigation Strategy', fontsize=14, fontweight='bold')
    ax.set_xticks(range(len(strategies)))
    ax.set_xticklabels([s.replace('_', ' ').title() for s in strategies], rotation=45, ha='right')
    
    # Add value labels on bars
    for bar in bars:
        height = bar.get_height()
        ax.text(bar.get_x() + bar.get_width()/2., height,
               f'{height:.1f}%', ha='center', va='bottom', fontweight='bold')
    
    plt.tight_layout()
    plt.savefig('../results/charts/hallucination_rate_by_strategy.png', dpi=300, bbox_inches='tight')
    plt.show()
    
    print("✓ Chart saved to results/charts/hallucination_rate_by_strategy.png")

### 1.2 By Prompt Category

In [None]:
if not df_all.empty:
    # Category performance
    category_performance = df_all.groupby('prompt_category').agg({
        'is_hallucination': ['count', 'sum', 'mean']
    }).round(3)
    
    category_performance.columns = ['Total', 'Hallucinations', 'Rate']
    category_performance['Rate (%)'] = category_performance['Rate'] * 100
    category_performance = category_performance.sort_values('Rate (%)', ascending=False)
    
    print("Hallucination Rate by Prompt Category:")
    print(category_performance)

In [None]:
# Visualization: Category heatmap by strategy
if not df_all.empty:
    # Create pivot table
    pivot = df_all.pivot_table(
        values='is_hallucination',
        index='prompt_category',
        columns='mitigation_strategy',
        aggfunc='mean'
    ) * 100
    
    fig, ax = plt.subplots(figsize=(12, 8))
    sns.heatmap(pivot, annot=True, fmt='.1f', cmap='RdYlGn_r', 
                cbar_kws={'label': 'Hallucination Rate (%)'}, ax=ax)
    ax.set_title('Hallucination Rate: Category vs Strategy Heatmap', 
                fontsize=14, fontweight='bold')
    ax.set_xlabel('Mitigation Strategy', fontsize=12, fontweight='bold')
    ax.set_ylabel('Prompt Category', fontsize=12, fontweight='bold')
    plt.tight_layout()
    plt.savefig('../results/charts/category_strategy_heatmap.png', dpi=300, bbox_inches='tight')
    plt.show()
    
    print("✓ Chart saved")

## 2. Cost-Benefit Analysis

### 2.1 Token Usage vs Accuracy

In [None]:
if not df_all.empty:
    # Calculate accuracy (inverse of hallucination rate)
    strategy_costs = df_all.groupby('mitigation_strategy').agg({
        'tokens_used': 'mean',
        'is_hallucination': 'mean',
        'response_time_ms': 'mean'
    })
    
    strategy_costs['accuracy'] = (1 - strategy_costs['is_hallucination']) * 100
    
    # Visualization
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6))
    
    # Tokens vs Accuracy
    strategies = strategy_costs.index
    ax1.scatter(strategy_costs['tokens_used'], strategy_costs['accuracy'], 
               s=200, alpha=0.6, c=colors[:len(strategies)])
    
    for idx, strategy in enumerate(strategies):
        ax1.annotate(strategy.replace('_', '\n'), 
                    (strategy_costs.loc[strategy, 'tokens_used'],
                     strategy_costs.loc[strategy, 'accuracy']),
                    fontsize=9, ha='center')
    
    ax1.set_xlabel('Average Tokens Used', fontsize=12, fontweight='bold')
    ax1.set_ylabel('Accuracy (%)', fontsize=12, fontweight='bold')
    ax1.set_title('Token Cost vs Accuracy', fontsize=13, fontweight='bold')
    ax1.grid(True, alpha=0.3)
    
    # Response Time vs Accuracy
    ax2.scatter(strategy_costs['response_time_ms'], strategy_costs['accuracy'],
               s=200, alpha=0.6, c=colors[:len(strategies)])
    
    for idx, strategy in enumerate(strategies):
        ax2.annotate(strategy.replace('_', '\n'),
                    (strategy_costs.loc[strategy, 'response_time_ms'],
                     strategy_costs.loc[strategy, 'accuracy']),
                    fontsize=9, ha='center')
    
    ax2.set_xlabel('Average Response Time (ms)', fontsize=12, fontweight='bold')
    ax2.set_ylabel('Accuracy (%)', fontsize=12, fontweight='bold')
    ax2.set_title('Speed vs Accuracy', fontsize=13, fontweight='bold')
    ax2.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.savefig('../results/charts/cost_benefit_analysis.png', dpi=300, bbox_inches='tight')
    plt.show()
    
    print("✓ Chart saved")

## 3. Severity Analysis

In [None]:
if not df_all.empty and 'severity' in df_all.columns:
    # Filter only hallucinations
    df_hallucinations = df_all[df_all['is_hallucination'] == True]
    
    if len(df_hallucinations) > 0:
        # Severity distribution
        severity_dist = df_hallucinations.groupby(['mitigation_strategy', 'severity']).size().unstack(fill_value=0)
        
        # Plot
        ax = severity_dist.plot(kind='bar', stacked=True, figsize=(10, 6),
                               color=['#27ae60', '#f39c12', '#e67e22', '#c0392b'])
        ax.set_xlabel('Mitigation Strategy', fontsize=12, fontweight='bold')
        ax.set_ylabel('Number of Hallucinations', fontsize=12, fontweight='bold')
        ax.set_title('Hallucination Severity Distribution by Strategy', fontsize=14, fontweight='bold')
        ax.legend(title='Severity', title_fontsize=11)
        plt.xticks(rotation=45, ha='right')
        plt.tight_layout()
        plt.savefig('../results/charts/severity_distribution.png', dpi=300, bbox_inches='tight')
        plt.show()
        
        print("✓ Chart saved")
    else:
        print("No hallucinations detected to analyze severity")

## 4. Interactive Visualizations (Plotly)

In [None]:
if not df_all.empty:
    # Interactive scatter: Response time vs Tokens, colored by hallucination
    fig = px.scatter(df_all, 
                    x='tokens_used', 
                    y='response_time_ms',
                    color='is_hallucination',
                    facet_col='mitigation_strategy',
                    hover_data=['prompt_category', 'severity'],
                    title='Response Characteristics by Strategy',
                    labels={'tokens_used': 'Tokens Used',
                           'response_time_ms': 'Response Time (ms)',
                           'is_hallucination': 'Hallucinated'})
    
    fig.write_html('../results/charts/interactive_scatter.html')
    fig.show()
    
    print("✓ Interactive chart saved to results/charts/interactive_scatter.html")

## 5. Statistical Summary for Report

In [None]:
if not df_all.empty:
    # Generate comprehensive report
    report = []
    report.append("="*80)
    report.append("ML HALLUCINATION RESEARCH - STATISTICAL SUMMARY")
    report.append("="*80)
    report.append(f"\nTotal Experiments Conducted: {stats['total_experiments']}")
    report.append(f"Total Tests Performed: {stats['total_tests']}")
    report.append(f"Data Collection Period: {df_all['created_at'].min()} to {df_all['created_at'].max()}")
    
    report.append("\n" + "="*80)
    report.append("MITIGATION STRATEGY EFFECTIVENESS")
    report.append("="*80)
    
    for strategy in strategy_performance.index:
        row = strategy_performance.loc[strategy]
        report.append(f"\n{strategy.upper().replace('_', ' ')}:")
        report.append(f"  • Total Tests: {int(row['Total Tests'])}")
        report.append(f"  • Hallucinations Detected: {int(row['Hallucinations'])}")
        report.append(f"  • Hallucination Rate: {row['Hallucination Rate (%)']}%")
        report.append(f"  • Avg Response Time: {row['Avg Response Time (ms)']:.0f}ms")
        report.append(f"  • Avg Token Usage: {row['Avg Tokens']:.0f} tokens")
    
    # Calculate improvement over baseline
    if 'baseline' in strategy_performance.index:
        baseline_rate = strategy_performance.loc['baseline', 'Hallucination Rate (%)']
        report.append("\n" + "="*80)
        report.append("IMPROVEMENT OVER BASELINE")
        report.append("="*80)
        
        for strategy in strategy_performance.index:
            if strategy != 'baseline':
                rate = strategy_performance.loc[strategy, 'Hallucination Rate (%)']
                improvement = baseline_rate - rate
                pct_improvement = (improvement / baseline_rate * 100) if baseline_rate > 0 else 0
                report.append(f"\n{strategy.upper().replace('_', ' ')}:")
                report.append(f"  • Absolute Reduction: {improvement:.1f} percentage points")
                report.append(f"  • Relative Improvement: {pct_improvement:.1f}%")
    
    report.append("\n" + "="*80)
    report.append("TOP HALLUCINATION-PRONE CATEGORIES")
    report.append("="*80)
    
    top_categories = category_performance.nlargest(5, 'Rate (%)')
    for idx, (cat, row) in enumerate(top_categories.iterrows(), 1):
        report.append(f"\n{idx}. {cat.replace('_', ' ').title()}")
        report.append(f"   Rate: {row['Rate (%)']}% ({int(row['Hallucinations'])}/{int(row['Total'])} tests)")
    
    report.append("\n" + "="*80)
    
    # Print and save
    report_text = "\n".join(report)
    print(report_text)
    
    with open('../results/reports/statistical_summary.txt', 'w') as f:
        f.write(report_text)
    
    print("\n✓ Report saved to results/reports/statistical_summary.txt")

## 6. Export All Data

In [None]:
if not df_all.empty:
    # Export complete dataset
    df_all.to_csv('../data/exports/complete_dataset.csv', index=False)
    print("✓ Complete dataset exported to data/exports/complete_dataset.csv")
    
    # Export summary tables
    strategy_performance.to_csv('../data/exports/strategy_summary.csv')
    category_performance.to_csv('../data/exports/category_summary.csv')
    
    print("✓ Summary tables exported")
    print("\nAll data ready for your report!")

## Key Insights for Your Report

Use this section to document your findings:

### 1. Primary Research Question
**Do mitigation strategies effectively reduce LLM hallucinations in cybersecurity contexts?**

Your finding:
- 

### 2. Most Effective Strategy
- Which strategy performed best?
- By how much did it reduce hallucinations?
- What were the trade-offs?

Your analysis:
- 

### 3. Vulnerability Categories
- Which prompt types were most prone to hallucination?
- Did this vary by mitigation strategy?

Your notes:
- 

### 4. Practical Recommendations
- For cybersecurity applications, which strategy would you recommend?
- When is each strategy most appropriate?

Your recommendations:
- 

### 5. Limitations & Future Work
- What are the limitations of this study?
- What would you do differently with more resources?

Your thoughts:
- 

In [None]:
# Cleanup
db.close()
print("\n✓ Analysis complete!")
print("\nAll visualizations and reports are in:")
print("  - results/charts/")
print("  - results/reports/")
print("  - data/exports/")