# Data Analysis & Visualization

This notebook provides comprehensive analysis and professional visualizations for your research report.

## Objectives
1. Aggregate all experimental data
2. Generate statistical insights
3. Create publication-quality visualizations
4. Provide data for report writing

In [None]:
# Setup
import sys
sys.path.append('../src')

from database import HallucinationDB
from config import Config
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Set style
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (12, 6)
plt.rcParams['font.size'] = 10

print("✓ Libraries loaded")

## Load All Data

In [None]:
# Connect to database
db = HallucinationDB()

# Get all experiments
df_experiments = db.get_all_experiments()
print("Experiments Summary:")
print(df_experiments)

# Get overall statistics
stats = db.get_statistics()
print(f"\nTotal experiments: {stats['total_experiments']}")
print(f"Total tests conducted: {stats['total_tests']}")

print("\nHallucination rates by mitigation strategy:")
print(stats['hallucination_by_strategy'])

In [None]:
# Load all experimental results into one dataframe
all_results = []

for idx, exp in df_experiments.iterrows():
    exp_id = exp['experiment_id']
    df_exp = db.get_experiment_results(exp_id)
    all_results.append(df_exp)

if all_results:
    df_all = pd.concat(all_results, ignore_index=True)
    print(f"\nTotal data points: {len(df_all)}")
    print(f"Date range: {df_all['created_at'].min()} to {df_all['created_at'].max()}")
else:
    print("\n⚠️  No experimental data found. Please run the testing notebooks first.")
    df_all = pd.DataFrame()

## 1. Overall Hallucination Rates

### 1.1 By Mitigation Strategy

In [None]:
if not df_all.empty:
    # Calculate rates by strategy
    strategy_performance = df_all.groupby('mitigation_strategy').agg({
        'is_hallucination': ['count', 'sum', 'mean'],
        'response_time_ms': 'mean',
        'tokens_used': 'mean'
    }).round(3)
    
    strategy_performance.columns = ['Total Tests', 'Hallucinations', 'Hallucination Rate', 
                                    'Avg Response Time (ms)', 'Avg Tokens']
    # Ensure numeric type for Rate (%) column
    strategy_performance['Hallucination Rate (%)'] = (strategy_performance['Hallucination Rate'] * 100).astype(float).round(2)
    
    print("Strategy Performance Metrics:")
    print(strategy_performance)
    
    # Save to CSV
    strategy_performance.to_csv('../results/reports/strategy_performance.csv')
    print("\n✓ Saved to results/reports/strategy_performance.csv")

In [None]:
# Professional Visualization: Hallucination Rate Comparison
if not df_all.empty:
    # Set modern style (compatible with all seaborn versions)
    try:
        plt.style.use('seaborn-v0_8-whitegrid')
    except:
        try:
            plt.style.use('seaborn-whitegrid')
        except:
            plt.style.use('ggplot')
    
    fig, ax = plt.subplots(figsize=(12, 7))
    
    strategies = strategy_performance.index
    rates = strategy_performance['Hallucination Rate (%)'].values
    
    # Modern, professional color palette
    colors = ['#264653', '#2A9D8F', '#E9C46A', '#F4A261', '#E76F51']
    
    bars = ax.barh(range(len(strategies)), rates, color=colors[:len(strategies)], 
                   alpha=0.85, edgecolor='black', linewidth=1.5)
    
    ax.set_yticks(range(len(strategies)))
    ax.set_yticklabels([s.replace('_', ' ').title() for s in strategies], fontsize=12, fontweight='600')
    ax.set_xlabel('Hallucination Rate (%)', fontsize=13, fontweight='bold')
    ax.set_title('Hallucination Rate by Mitigation Strategy', 
                 fontsize=16, fontweight='bold', pad=20)
    ax.grid(axis='x', alpha=0.3, linestyle='--', linewidth=0.8)
    ax.set_axisbelow(True)
    
    # Add value labels on bars
    for i, (bar, val) in enumerate(zip(bars, rates)):
        width = bar.get_width()
        ax.text(width + max(rates)*0.01, bar.get_y() + bar.get_height()/2.,
               f'{val:.1f}%', ha='left', va='center', fontweight='bold', fontsize=11)
    
    # Remove top and right spines for cleaner look
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    
    plt.tight_layout()
    plt.savefig('../results/charts/hallucination_rate_by_strategy.png', 
                dpi=300, bbox_inches='tight', facecolor='white')
    plt.show()
    
    print("✓ Chart saved to results/charts/hallucination_rate_by_strategy.png")

### 1.2 By Prompt Category

In [None]:
if not df_all.empty:
    # Category performance
    category_performance = df_all.groupby('prompt_category').agg({
        'is_hallucination': ['count', 'sum', 'mean']
    }).round(3)
    
    category_performance.columns = ['Total', 'Hallucinations', 'Rate']
    # Ensure numeric type for Rate (%) column
    category_performance['Rate (%)'] = (category_performance['Rate'] * 100).astype(float).round(2)
    category_performance = category_performance.sort_values('Rate (%)', ascending=False)
    
    print("Hallucination Rate by Prompt Category:")
    print(category_performance)

In [None]:
# Professional Heatmap: Category vs Strategy
if not df_all.empty:
    # Create pivot table
    pivot = df_all.pivot_table(
        values='is_hallucination',
        index='prompt_category',
        columns='mitigation_strategy',
        aggfunc='mean'
    ) * 100
    
    fig, ax = plt.subplots(figsize=(14, 10))
    
    # Use a more professional color scheme
    sns.heatmap(pivot, annot=True, fmt='.1f', 
                cmap='RdYlGn_r', center=50,
                cbar_kws={'label': 'Hallucination Rate (%)', 'shrink': 0.8},
                linewidths=1.5, linecolor='white',
                annot_kws={'fontsize': 10, 'fontweight': 'bold'},
                ax=ax, vmin=0, vmax=100)
    
    ax.set_title('Hallucination Rate: Category vs Strategy Heatmap', 
                fontsize=16, fontweight='bold', pad=20)
    ax.set_xlabel('Mitigation Strategy', fontsize=13, fontweight='bold', labelpad=10)
    ax.set_ylabel('Prompt Category', fontsize=13, fontweight='bold', labelpad=10)
    
    # Improve tick labels
    ax.set_xticklabels([label.get_text().replace('_', ' ').title() 
                        for label in ax.get_xticklabels()], 
                       rotation=30, ha='right', fontsize=11)
    ax.set_yticklabels([label.get_text().replace('_', ' ').title() 
                        for label in ax.get_yticklabels()], 
                       rotation=0, fontsize=11)
    
    plt.tight_layout()
    plt.savefig('../results/charts/category_strategy_heatmap.png', 
                dpi=300, bbox_inches='tight', facecolor='white')
    plt.show()
    
    print("✓ Chart saved")

## 2. Cost-Benefit Analysis

### 2.1 Token Usage vs Accuracy

In [None]:
if not df_all.empty:
    # Calculate accuracy (inverse of hallucination rate)
    strategy_costs = df_all.groupby('mitigation_strategy').agg({
        'tokens_used': 'mean',
        'is_hallucination': 'mean',
        'response_time_ms': 'mean'
    })
    
    strategy_costs['accuracy'] = (1 - strategy_costs['is_hallucination']) * 100
    
    # Professional scatter plot visualization
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 7))
    
    # Modern color palette
    colors_map = ['#003f5c', '#58508d', '#bc5090', '#ff6361', '#ffa600']
    strategies = strategy_costs.index
    
    # 1. Tokens vs Accuracy - Bubble plot
    sizes = [300 + i*100 for i in range(len(strategies))]
    
    for idx, strategy in enumerate(strategies):
        ax1.scatter(strategy_costs.loc[strategy, 'tokens_used'], 
                   strategy_costs.loc[strategy, 'accuracy'], 
                   s=sizes[idx], alpha=0.6, c=colors_map[idx],
                   edgecolors='black', linewidth=2, zorder=3)
        
        # Add strategy labels with better positioning
        ax1.annotate(strategy.replace('_', ' ').title(), 
                    (strategy_costs.loc[strategy, 'tokens_used'],
                     strategy_costs.loc[strategy, 'accuracy']),
                    xytext=(10, 10), textcoords='offset points',
                    fontsize=10, fontweight='600',
                    bbox=dict(boxstyle='round,pad=0.5', facecolor='white', 
                             edgecolor='gray', alpha=0.8))
    
    ax1.set_xlabel('Average Tokens Used', fontsize=13, fontweight='bold')
    ax1.set_ylabel('Accuracy (%)', fontsize=13, fontweight='bold')
    ax1.set_title('Token Cost vs Accuracy Trade-off', fontsize=14, fontweight='bold', pad=15)
    ax1.grid(True, alpha=0.3, linestyle='--', linewidth=0.8)
    ax1.set_axisbelow(True)
    ax1.spines['top'].set_visible(False)
    ax1.spines['right'].set_visible(False)
    
    # 2. Response Time vs Accuracy - Bubble plot
    for idx, strategy in enumerate(strategies):
        ax2.scatter(strategy_costs.loc[strategy, 'response_time_ms'],
                   strategy_costs.loc[strategy, 'accuracy'],
                   s=sizes[idx], alpha=0.6, c=colors_map[idx],
                   edgecolors='black', linewidth=2, zorder=3)
        
        # Add strategy labels with better positioning
        ax2.annotate(strategy.replace('_', ' ').title(),
                    (strategy_costs.loc[strategy, 'response_time_ms'],
                     strategy_costs.loc[strategy, 'accuracy']),
                    xytext=(10, 10), textcoords='offset points',
                    fontsize=10, fontweight='600',
                    bbox=dict(boxstyle='round,pad=0.5', facecolor='white',
                             edgecolor='gray', alpha=0.8))
    
    ax2.set_xlabel('Average Response Time (ms)', fontsize=13, fontweight='bold')
    ax2.set_ylabel('Accuracy (%)', fontsize=13, fontweight='bold')
    ax2.set_title('Speed vs Accuracy Trade-off', fontsize=14, fontweight='bold', pad=15)
    ax2.grid(True, alpha=0.3, linestyle='--', linewidth=0.8)
    ax2.set_axisbelow(True)
    ax2.spines['top'].set_visible(False)
    ax2.spines['right'].set_visible(False)
    
    plt.suptitle('Cost-Benefit Analysis', fontsize=16, fontweight='bold', y=1.02)
    plt.tight_layout()
    plt.savefig('../results/charts/cost_benefit_analysis.png', 
                dpi=300, bbox_inches='tight', facecolor='white')
    plt.show()
    
    print("✓ Chart saved")

## 3. Severity Analysis

In [None]:
if not df_all.empty and 'severity' in df_all.columns:
    # Filter only hallucinations
    df_hallucinations = df_all[df_all['is_hallucination'] == True]
    
    if len(df_hallucinations) > 0:
        # Severity distribution
        severity_dist = df_hallucinations.groupby(['mitigation_strategy', 'severity']).size().unstack(fill_value=0)
        
        # Professional stacked bar chart
        fig, ax = plt.subplots(figsize=(12, 7))
        
        # Color scheme for severity levels
        severity_colors = {
            'low': '#27ae60',
            'medium': '#f39c12', 
            'high': '#e67e22',
            'critical': '#c0392b'
        }
        
        # Plot stacked bars
        severity_dist.plot(kind='bar', stacked=True, ax=ax,
                          color=[severity_colors.get(col, '#95a5a6') for col in severity_dist.columns],
                          alpha=0.85, edgecolor='black', linewidth=1.2)
        
        ax.set_xlabel('Mitigation Strategy', fontsize=13, fontweight='bold', labelpad=10)
        ax.set_ylabel('Number of Hallucinations', fontsize=13, fontweight='bold', labelpad=10)
        ax.set_title('Hallucination Severity Distribution by Strategy', 
                    fontsize=16, fontweight='bold', pad=20)
        
        # Improve tick labels
        ax.set_xticklabels([label.get_text().replace('_', ' ').title() 
                           for label in ax.get_xticklabels()], 
                          rotation=25, ha='right', fontsize=11)
        
        # Improve legend
        ax.legend(title='Severity', title_fontsize=12, fontsize=10,
                 frameon=True, fancybox=True, shadow=True, loc='upper right')
        
        ax.grid(axis='y', alpha=0.3, linestyle='--', linewidth=0.8)
        ax.set_axisbelow(True)
        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)
        
        plt.tight_layout()
        plt.savefig('../results/charts/severity_distribution.png', 
                   dpi=300, bbox_inches='tight', facecolor='white')
        plt.show()
        
        print("✓ Chart saved")
    else:
        print("No hallucinations detected to analyze severity")

## 4. Interactive Visualizations (Plotly)

In [None]:
if not df_all.empty:
    # Prepare data with clean labels
    df_plot = df_all.copy()
    df_plot['Strategy'] = df_plot['mitigation_strategy'].str.replace('_', ' ').str.title()
    df_plot['Category'] = df_plot['prompt_category'].str.replace('_', ' ').str.title()
    df_plot['Hallucinated'] = df_plot['is_hallucination'].map({True: 'Yes', False: 'No'})
    
    # Interactive scatter: Response time vs Tokens, colored by hallucination
    fig = px.scatter(df_plot, 
                    x='tokens_used', 
                    y='response_time_ms',
                    color='Hallucinated',
                    facet_col='Strategy',
                    facet_col_wrap=2,  # Wrap to 2 columns to prevent overcrowding
                    hover_data={'Category': True, 
                               'severity': True,
                               'tokens_used': ':.0f',
                               'response_time_ms': ':.1f',
                               'Strategy': False,
                               'Hallucinated': False},
                    title='<b>Response Characteristics by Mitigation Strategy</b>',
                    labels={'tokens_used': 'Tokens Used',
                           'response_time_ms': 'Response Time (ms)',
                           'Hallucinated': 'Hallucination'},
                    color_discrete_map={'Yes': '#E74C3C', 'No': '#2ECC71'},
                    height=600)
    
    # Update layout for better readability
    fig.update_layout(
        font=dict(size=12, family='Arial, sans-serif'),
        title_font=dict(size=18, family='Arial, sans-serif'),
        plot_bgcolor='rgba(240, 240, 240, 0.5)',
        paper_bgcolor='white',
        hoverlabel=dict(
            bgcolor="white",
            font_size=12,
            font_family="Arial, sans-serif"
        ),
        legend=dict(
            title_font_family="Arial, sans-serif",
            font=dict(size=11),
            bgcolor='rgba(255, 255, 255, 0.8)',
            bordercolor='gray',
            borderwidth=1
        )
    )
    
    # Update traces for better visibility
    fig.update_traces(
        marker=dict(size=10, line=dict(width=1, color='DarkSlateGrey')),
        opacity=0.7
    )
    
    # Update axes
    fig.update_xaxes(
        showgrid=True, 
        gridwidth=1, 
        gridcolor='rgba(128, 128, 128, 0.2)',
        title_font=dict(size=12, family='Arial, sans-serif')
    )
    fig.update_yaxes(
        showgrid=True, 
        gridwidth=1, 
        gridcolor='rgba(128, 128, 128, 0.2)',
        title_font=dict(size=12, family='Arial, sans-serif')
    )
    
    # Clean up facet labels to remove override
    fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
    
    fig.write_html('../results/charts/interactive_scatter.html')
    fig.show()
    
    print("✓ Interactive chart saved to results/charts/interactive_scatter.html")

## 5. Statistical Summary for Report

In [None]:
if not df_all.empty:
    # Generate comprehensive report
    report = []
    report.append("="*80)
    report.append("ML HALLUCINATION RESEARCH - STATISTICAL SUMMARY")
    report.append("="*80)
    report.append(f"\nTotal Experiments Conducted: {stats['total_experiments']}")
    report.append(f"Total Tests Performed: {stats['total_tests']}")
    report.append(f"Data Collection Period: {df_all['created_at'].min()} to {df_all['created_at'].max()}")
    
    report.append("\n" + "="*80)
    report.append("MITIGATION STRATEGY EFFECTIVENESS")
    report.append("="*80)
    
    for strategy in strategy_performance.index:
        row = strategy_performance.loc[strategy]
        report.append(f"\n{strategy.upper().replace('_', ' ')}:")
        report.append(f"  • Total Tests: {int(row['Total Tests'])}")
        report.append(f"  • Hallucinations Detected: {int(row['Hallucinations'])}")
        report.append(f"  • Hallucination Rate: {row['Hallucination Rate (%)']}%")
        report.append(f"  • Avg Response Time: {row['Avg Response Time (ms)']:.0f}ms")
        report.append(f"  • Avg Token Usage: {row['Avg Tokens']:.0f} tokens")
    
    # Calculate improvement over baseline
    if 'baseline' in strategy_performance.index:
        baseline_rate = strategy_performance.loc['baseline', 'Hallucination Rate (%)']
        report.append("\n" + "="*80)
        report.append("IMPROVEMENT OVER BASELINE")
        report.append("="*80)
        
        for strategy in strategy_performance.index:
            if strategy != 'baseline':
                rate = strategy_performance.loc[strategy, 'Hallucination Rate (%)']
                improvement = baseline_rate - rate
                pct_improvement = (improvement / baseline_rate * 100) if baseline_rate > 0 else 0
                report.append(f"\n{strategy.upper().replace('_', ' ')}:")
                report.append(f"  • Absolute Reduction: {improvement:.1f} percentage points")
                report.append(f"  • Relative Improvement: {pct_improvement:.1f}%")
    
    report.append("\n" + "="*80)
    report.append("TOP HALLUCINATION-PRONE CATEGORIES")
    report.append("="*80)
    
    top_categories = category_performance.nlargest(5, 'Rate (%)')
    for idx, (cat, row) in enumerate(top_categories.iterrows(), 1):
        report.append(f"\n{idx}. {cat.replace('_', ' ').title()}")
        report.append(f"   Rate: {row['Rate (%)']}% ({int(row['Hallucinations'])}/{int(row['Total'])} tests)")
    
    report.append("\n" + "="*80)
    
    # Print and save
    report_text = "\n".join(report)
    print(report_text)
    
    with open('../results/reports/statistical_summary.txt', 'w') as f:
        f.write(report_text)
    
    print("\n✓ Report saved to results/reports/statistical_summary.txt")

## 6. Export All Data

In [None]:
if not df_all.empty:
    # Export complete dataset
    df_all.to_csv('../data/exports/complete_dataset.csv', index=False)
    print("✓ Complete dataset exported to data/exports/complete_dataset.csv")
    
    # Export summary tables
    strategy_performance.to_csv('../data/exports/strategy_summary.csv')
    category_performance.to_csv('../data/exports/category_summary.csv')
    
    print("✓ Summary tables exported")
    print("\nAll data ready for your report!")

## Key Insights for Your Report

Use this section to document your findings:

### 1. Primary Research Question
**Do mitigation strategies effectively reduce LLM hallucinations in cybersecurity contexts?**

Your finding:
- 

### 2. Most Effective Strategy
- Which strategy performed best?
- By how much did it reduce hallucinations?
- What were the trade-offs?

Your analysis:
- 

### 3. Vulnerability Categories
- Which prompt types were most prone to hallucination?
- Did this vary by mitigation strategy?

Your notes:
- 

### 4. Practical Recommendations
- For cybersecurity applications, which strategy would you recommend?
- When is each strategy most appropriate?

Your recommendations:
- 

### 5. Limitations & Future Work
- What are the limitations of this study?
- What would you do differently with more resources?

Your thoughts:
- 

In [None]:
# Cleanup
db.close()
print("\n✓ Analysis complete!")
print("\nAll visualizations and reports are in:")
print("  - results/charts/")
print("  - results/reports/")
print("  - data/exports/")