# Plots and Results

This notebook generates all plots and figures for the academic report.


In [None]:
import sys
from pathlib import Path

# Add src to path
sys.path.insert(0, str(Path().absolute().parent))

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
from src.evaluation.metrics import aggregate_results, compute_theoretical_bound

# Set style
plt.style.use('seaborn-v0_8-darkgrid' if 'seaborn-v0_8-darkgrid' in plt.style.available else 'default')
matplotlib.rcParams['figure.figsize'] = (12, 8)
matplotlib.rcParams['font.size'] = 12

# Create output directory
output_dir = Path('../results/figures')
output_dir.mkdir(parents=True, exist_ok=True)


## Load results


In [None]:
results_path = Path('../data/processed/experiments_full.csv')

if results_path.exists():
    df = pd.read_csv(results_path)
    print(f"Loaded {len(df)} experiment results")
else:
    print(f"Results file not found at {results_path}")
    print("Please run experiments first using: python scripts/run_all_experiments.py")
    df = pd.DataFrame()


## Plot 1: Spanner size vs k for different n and p


In [None]:
if not df.empty:
    # Aggregate by (n, p, k)
    df_agg = df.groupby(['n', 'p', 'k'])['spanner_size'].mean().reset_index()
    
    # Get unique n and p values
    n_values = sorted(df_agg['n'].unique())
    p_values = sorted(df_agg['p'].unique())
    
    # Create subplots for different n values
    fig, axes = plt.subplots(1, len(n_values), figsize=(6*len(n_values), 6))
    if len(n_values) == 1:
        axes = [axes]
    
    for idx, n in enumerate(n_values):
        ax = axes[idx]
        df_n = df_agg[df_agg['n'] == n]
        
        for p in p_values[:5]:  # Limit to first 5 p values for clarity
            df_np = df_n[df_n['p'] == p]
            if not df_np.empty:
                ax.plot(df_np['k'], df_np['spanner_size'], marker='o', 
                       label=f'p={p:.3f}', linewidth=2, markersize=8)
        
        # Theoretical bound
        k_vals = sorted(df_n['k'].unique())
        theoretical = [compute_theoretical_bound(n, k) for k in k_vals]
        ax.plot(k_vals, theoretical, 'k--', label='Theoretical bound', linewidth=2)
        
        ax.set_xlabel('k', fontsize=14)
        ax.set_ylabel('Spanner Size |E(H)|', fontsize=14)
        ax.set_title(f'n = {n}', fontsize=16)
        ax.legend()
        ax.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.savefig(output_dir / 'spanner_size_vs_k.png', dpi=300, bbox_inches='tight')
    plt.savefig(output_dir / 'spanner_size_vs_k.pdf', bbox_inches='tight')
    plt.show()


## Plot 2: Stretch vs k


In [None]:
if not df.empty:
    # Aggregate stretch metrics
    df_stretch = df.groupby(['n', 'p', 'k'])[['max_stretch_edges', 'avg_stretch_edges', 
                                               'max_stretch_pairs', 'avg_stretch_pairs']].mean().reset_index()
    
    fig, axes = plt.subplots(1, 2, figsize=(16, 6))
    
    # Max stretch
    ax = axes[0]
    for n in sorted(df_stretch['n'].unique())[:3]:  # Limit to 3 n values
        df_n = df_stretch[df_stretch['n'] == n]
        p = df_n['p'].iloc[0]  # Use first p value
        df_np = df_n[df_n['p'] == p]
        if not df_np.empty:
            ax.plot(df_np['k'], df_np['max_stretch_edges'], marker='o', 
                   label=f'n={n}, p={p:.3f}', linewidth=2, markersize=8)
    
    # Theoretical bound line
    k_vals = sorted(df_stretch['k'].unique())
    bound_vals = [2*k - 1 for k in k_vals]
    ax.plot(k_vals, bound_vals, 'k--', label='Theoretical bound (2k-1)', linewidth=2)
    
    ax.set_xlabel('k', fontsize=14)
    ax.set_ylabel('Max Stretch', fontsize=14)
    ax.set_title('Max Stretch vs k', fontsize=16)
    ax.legend()
    ax.grid(True, alpha=0.3)
    
    # Avg stretch
    ax = axes[1]
    for n in sorted(df_stretch['n'].unique())[:3]:
        df_n = df_stretch[df_stretch['n'] == n]
        p = df_n['p'].iloc[0]
        df_np = df_n[df_n['p'] == p]
        if not df_np.empty:
            ax.plot(df_np['k'], df_np['avg_stretch_edges'], marker='s', 
                   label=f'n={n}, p={p:.3f}', linewidth=2, markersize=8)
    
    ax.set_xlabel('k', fontsize=14)
    ax.set_ylabel('Average Stretch', fontsize=14)
    ax.set_title('Average Stretch vs k', fontsize=16)
    ax.legend()
    ax.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.savefig(output_dir / 'stretch_vs_k.png', dpi=300, bbox_inches='tight')
    plt.savefig(output_dir / 'stretch_vs_k.pdf', bbox_inches='tight')
    plt.show()


In [None]:
if not df.empty:
    # Aggregate runtime by n
    df_time = df.groupby(['n', 'k'])[['time_gen', 'time_spanner', 'time_stretch']].mean().reset_index()
    
    fig, axes = plt.subplots(1, 3, figsize=(18, 6))
    
    time_cols = ['time_gen', 'time_spanner', 'time_stretch']
    titles = ['Graph Generation', 'Spanner Construction', 'Stretch Evaluation']
    
    for idx, (col, title) in enumerate(zip(time_cols, titles)):
        ax = axes[idx]
        for k in sorted(df_time['k'].unique())[:3]:  # Limit to 3 k values
            df_k = df_time[df_time['k'] == k]
            ax.loglog(df_k['n'], df_k[col], marker='o', label=f'k={k}', 
                     linewidth=2, markersize=8)
        
        ax.set_xlabel('n (number of vertices)', fontsize=14)
        ax.set_ylabel('Time (seconds)', fontsize=14)
        ax.set_title(title, fontsize=16)
        ax.legend()
        ax.grid(True, alpha=0.3, which='both')
    
    plt.tight_layout()
    plt.savefig(output_dir / 'runtime_vs_n.png', dpi=300, bbox_inches='tight')
    plt.savefig(output_dir / 'runtime_vs_n.pdf', bbox_inches='tight')
    plt.show()


## Plot 4: Spanner size ratio to theoretical bound


In [None]:
if not df.empty:
    # Plot ratio |E(H)| / (k * n^(1+1/k))
    df_ratio = df.groupby(['n', 'k'])['spanner_size_ratio'].mean().reset_index()
    
    fig, ax = plt.subplots(1, 1, figsize=(10, 6))
    
    for k in sorted(df_ratio['k'].unique()):
        df_k = df_ratio[df_ratio['k'] == k]
        ax.plot(df_k['n'], df_k['spanner_size_ratio'], marker='o', 
               label=f'k={k}', linewidth=2, markersize=8)
    
    # Reference line at 1.0
    n_vals = sorted(df_ratio['n'].unique())
    ax.axhline(y=1.0, color='k', linestyle='--', linewidth=2, label='Theoretical bound')
    
    ax.set_xlabel('n (number of vertices)', fontsize=14)
    ax.set_ylabel('|E(H)| / (k * n^(1+1/k))', fontsize=14)
    ax.set_title('Spanner Size Ratio to Theoretical Bound', fontsize=16)
    ax.legend()
    ax.grid(True, alpha=0.3)
    ax.set_yscale('log')
    
    plt.tight_layout()
    plt.savefig(output_dir / 'spanner_size_ratio.png', dpi=300, bbox_inches='tight')
    plt.savefig(output_dir / 'spanner_size_ratio.pdf', bbox_inches='tight')
    plt.show()
