# SPON Results Visualization

This notebook helps you analyze and visualize experimental results from SPON allocation sweeps.

**What you'll learn:**
1. How to load and parse experimental results
2. How to plot Pareto frontiers (parameter-performance trade-offs)
3. How to compare configurations statistically
4. How to generate publication-ready figures

In [None]:
import sys
from pathlib import Path
sys.path.insert(0, str(Path.cwd().parent))

import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Style
plt.style.use('seaborn-v0_8-whitegrid')
sns.set_palette("husl")

from src.result_manager import ExperimentManager
from src.evaluation import compute_pareto_frontier

## 1. Load Experimental Results

Specify the path to your experiment results directory.

In [None]:
# Change this to your results directory
RESULTS_DIR = Path("../results/allocation_sweep")

if RESULTS_DIR.exists():
    exp_manager = ExperimentManager("allocation_sweep", str(RESULTS_DIR.parent))
    runs = exp_manager.list_runs()
    print(f"Found {len(runs)} runs:")
    for run in runs:
        print(f"  - {run}")
else:
    print(f"Results directory not found: {RESULTS_DIR}")
    print("Run the experiment first, or we'll use demo data below.")
    runs = []

In [None]:
# Load results (or create demo data)
if runs:
    # Load real results
    all_results = []
    for run_id in runs:
        run_data = exp_manager.load_run(run_id)
        if run_data and "results" in run_data:
            all_results.extend(run_data["results"])
    
    df = pd.DataFrame(all_results)
    print(f"Loaded {len(df)} result rows")
else:
    # Demo data (typical SPON results)
    demo_data = [
        {"config_name": "BASELINE-TEAL", "sparsity": 0.5, "perplexity": 18.5, "relative_params": 0.0},
        {"config_name": "UNIF-ALL", "sparsity": 0.5, "perplexity": 15.2, "relative_params": 1.0},
        {"config_name": "TOP-25", "sparsity": 0.5, "perplexity": 17.1, "relative_params": 0.25},
        {"config_name": "TOP-50", "sparsity": 0.5, "perplexity": 15.8, "relative_params": 0.50},
        {"config_name": "TOP-75", "sparsity": 0.5, "perplexity": 15.4, "relative_params": 0.75},
        {"config_name": "BOTTOM-50", "sparsity": 0.5, "perplexity": 16.9, "relative_params": 0.50},
        {"config_name": "BASELINE-TEAL", "sparsity": 0.6, "perplexity": 22.1, "relative_params": 0.0},
        {"config_name": "UNIF-ALL", "sparsity": 0.6, "perplexity": 17.8, "relative_params": 1.0},
        {"config_name": "TOP-50", "sparsity": 0.6, "perplexity": 18.9, "relative_params": 0.50},
    ]
    df = pd.DataFrame(demo_data)
    print("Using demo data (run experiment to see real results)")

df.head(10)

## 2. Pareto Frontier Analysis

The **Pareto frontier** shows optimal trade-offs between parameter count and performance.

A configuration is Pareto-optimal if no other configuration achieves both:
- Fewer parameters AND
- Lower perplexity

In [None]:
def plot_pareto_frontier(df, sparsity=0.5, ax=None):
    """Plot Pareto frontier for a given sparsity level."""
    if ax is None:
        fig, ax = plt.subplots(figsize=(10, 7))
    
    # Filter by sparsity
    subset = df[df['sparsity'] == sparsity].copy()
    
    if len(subset) == 0:
        print(f"No data for sparsity={sparsity}")
        return
    
    # Compute Pareto frontier
    pareto = compute_pareto_frontier(
        subset.to_dict('records'),
        x_key='relative_params',
        y_key='perplexity'
    )
    pareto_df = pd.DataFrame(pareto)
    
    # Plot all points
    colors = {'BASELINE-TEAL': 'red', 'UNIF-ALL': 'blue'}
    for _, row in subset.iterrows():
        color = colors.get(row['config_name'], 'gray')
        is_pareto = row['config_name'] in pareto_df['config_name'].values if len(pareto_df) > 0 else False
        marker = '*' if is_pareto else 'o'
        size = 200 if is_pareto else 100
        ax.scatter(
            row['relative_params'], row['perplexity'],
            c=color, s=size, marker=marker, alpha=0.8,
            edgecolors='black', linewidths=1
        )
        ax.annotate(
            row['config_name'], 
            (row['relative_params'], row['perplexity']),
            xytext=(5, 5), textcoords='offset points', fontsize=9
        )
    
    # Plot Pareto frontier line
    if len(pareto_df) > 1:
        pareto_sorted = pareto_df.sort_values('relative_params')
        ax.plot(
            pareto_sorted['relative_params'], pareto_sorted['perplexity'],
            'g--', linewidth=2, label='Pareto frontier'
        )
    
    ax.set_xlabel('Relative Parameters (vs UNIF-ALL)', fontsize=12)
    ax.set_ylabel('Perplexity', fontsize=12)
    ax.set_title(f'SPON Allocation Trade-offs (Sparsity={sparsity:.0%})', fontsize=14)
    ax.legend()
    ax.grid(True, alpha=0.3)
    
    return pareto_df

# Plot for 50% sparsity
pareto = plot_pareto_frontier(df, sparsity=0.5)
plt.tight_layout()
plt.show()

if pareto is not None and len(pareto) > 0:
    print("\nPareto-optimal configurations:")
    print(pareto)

## 3. Configuration Comparison

Compare all configurations against the TEAL baseline.

In [None]:
def plot_config_comparison(df, sparsity=0.5):
    """Bar chart comparing configurations."""
    subset = df[df['sparsity'] == sparsity].copy()
    
    # Get TEAL baseline
    teal_ppl = subset[subset['config_name'] == 'BASELINE-TEAL']['perplexity'].values
    if len(teal_ppl) == 0:
        print("No TEAL baseline found")
        return
    teal_ppl = teal_ppl[0]
    
    # Compute improvement
    subset['improvement'] = (teal_ppl - subset['perplexity']) / teal_ppl * 100
    subset = subset.sort_values('improvement', ascending=True)
    
    # Plot
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6))
    
    # Perplexity
    colors = ['green' if x > 0 else 'red' for x in subset['improvement']]
    bars = ax1.barh(subset['config_name'], subset['perplexity'], color=colors, alpha=0.7)
    ax1.axvline(x=teal_ppl, color='red', linestyle='--', label=f'TEAL baseline ({teal_ppl:.1f})')
    ax1.set_xlabel('Perplexity', fontsize=12)
    ax1.set_title(f'Perplexity by Configuration (Sparsity={sparsity:.0%})', fontsize=14)
    ax1.legend()
    
    # Improvement
    colors = ['green' if x > 0 else 'red' for x in subset['improvement']]
    ax2.barh(subset['config_name'], subset['improvement'], color=colors, alpha=0.7)
    ax2.axvline(x=0, color='black', linestyle='-', linewidth=0.5)
    ax2.set_xlabel('PPL Improvement vs TEAL (%)', fontsize=12)
    ax2.set_title('Relative Improvement', fontsize=14)
    
    plt.tight_layout()
    plt.show()
    
    return subset[['config_name', 'perplexity', 'relative_params', 'improvement']]

comparison = plot_config_comparison(df, sparsity=0.5)
if comparison is not None:
    print("\nDetailed comparison:")
    print(comparison.to_string(index=False))

## 4. Sparsity Sensitivity Analysis

How does performance change across sparsity levels?

In [None]:
def plot_sparsity_sensitivity(df):
    """Plot perplexity vs sparsity for each configuration."""
    configs = df['config_name'].unique()
    sparsities = sorted(df['sparsity'].unique())
    
    if len(sparsities) < 2:
        print("Need at least 2 sparsity levels for sensitivity analysis")
        return
    
    fig, ax = plt.subplots(figsize=(10, 6))
    
    for config in configs:
        subset = df[df['config_name'] == config].sort_values('sparsity')
        if len(subset) >= 2:
            linestyle = '--' if config == 'BASELINE-TEAL' else '-'
            ax.plot(
                subset['sparsity'] * 100, subset['perplexity'],
                marker='o', linestyle=linestyle, linewidth=2, markersize=8,
                label=config
            )
    
    ax.set_xlabel('Sparsity (%)', fontsize=12)
    ax.set_ylabel('Perplexity', fontsize=12)
    ax.set_title('SPON Performance vs Sparsity Level', fontsize=14)
    ax.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
    ax.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()

plot_sparsity_sensitivity(df)

## 5. Efficiency Analysis

Which configurations give the best "bang for your buck"?

In [None]:
def compute_efficiency(df, sparsity=0.5):
    """Compute efficiency = improvement per parameter."""
    subset = df[df['sparsity'] == sparsity].copy()
    
    teal_ppl = subset[subset['config_name'] == 'BASELINE-TEAL']['perplexity'].values
    if len(teal_ppl) == 0:
        return None
    teal_ppl = teal_ppl[0]
    
    # Filter out TEAL baseline
    subset = subset[subset['relative_params'] > 0].copy()
    
    # Compute improvement and efficiency
    subset['improvement'] = (teal_ppl - subset['perplexity']) / teal_ppl * 100
    subset['efficiency'] = subset['improvement'] / subset['relative_params']
    
    subset = subset.sort_values('efficiency', ascending=False)
    
    # Plot
    fig, ax = plt.subplots(figsize=(10, 6))
    
    colors = plt.cm.viridis(np.linspace(0, 1, len(subset)))
    bars = ax.barh(subset['config_name'], subset['efficiency'], color=colors, alpha=0.8)
    
    ax.set_xlabel('Efficiency (% improvement / relative params)', fontsize=12)
    ax.set_title(f'Parameter Efficiency (Sparsity={sparsity:.0%})', fontsize=14)
    ax.grid(True, alpha=0.3, axis='x')
    
    # Add value labels
    for bar, eff in zip(bars, subset['efficiency']):
        ax.text(bar.get_width() + 0.5, bar.get_y() + bar.get_height()/2,
                f'{eff:.1f}', va='center', fontsize=10)
    
    plt.tight_layout()
    plt.show()
    
    return subset[['config_name', 'improvement', 'relative_params', 'efficiency']]

eff_df = compute_efficiency(df, sparsity=0.5)
if eff_df is not None:
    print("\nEfficiency ranking:")
    print(eff_df.to_string(index=False))

## 6. Export Publication-Ready Figures

Save high-quality figures for papers.

In [None]:
def export_publication_figure(df, output_path, sparsity=0.5):
    """Export publication-ready Pareto frontier figure."""
    fig, ax = plt.subplots(figsize=(8, 6))
    
    # Use publication-ready style
    plt.rcParams.update({
        'font.size': 12,
        'axes.labelsize': 14,
        'axes.titlesize': 16,
        'legend.fontsize': 11,
    })
    
    plot_pareto_frontier(df, sparsity=sparsity, ax=ax)
    
    plt.tight_layout()
    plt.savefig(output_path, dpi=300, bbox_inches='tight')
    print(f"Saved to {output_path}")
    plt.show()

# Export
output_dir = Path("../results/figures")
output_dir.mkdir(parents=True, exist_ok=True)
export_publication_figure(df, output_dir / "pareto_frontier.png", sparsity=0.5)

## 7. Summary Statistics

In [None]:
# Summary table
print("=" * 60)
print("SPON ALLOCATION EXPERIMENT SUMMARY")
print("=" * 60)

for sparsity in sorted(df['sparsity'].unique()):
    print(f"\n--- Sparsity: {sparsity:.0%} ---")
    subset = df[df['sparsity'] == sparsity]
    
    teal = subset[subset['config_name'] == 'BASELINE-TEAL']['perplexity'].values
    best = subset.loc[subset['perplexity'].idxmin()]
    
    if len(teal) > 0:
        print(f"  TEAL baseline:  {teal[0]:.2f} PPL")
    print(f"  Best config:    {best['config_name']} ({best['perplexity']:.2f} PPL)")
    if len(teal) > 0:
        improvement = (teal[0] - best['perplexity']) / teal[0] * 100
        print(f"  Improvement:    {improvement:.1f}%")
        print(f"  Params used:    {best['relative_params']:.0%} of UNIF-ALL")