# Experiment Results Analysis

This notebook loads and analyzes the results from spanner experiments.


In [None]:
import sys
from pathlib import Path

# Add src to path
sys.path.insert(0, str(Path().absolute().parent))

import pandas as pd
import numpy as np
from src.evaluation.metrics import aggregate_results

# Set display options
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)


## Load experiment results


In [None]:
# Load results
results_path = Path('../data/processed/experiments_full.csv')

if results_path.exists():
    df = pd.read_csv(results_path)
    print(f"Loaded {len(df)} experiment results")
    print(f"\nColumns: {list(df.columns)}")
    print(f"\nFirst few rows:")
    print(df.head())
else:
    print(f"Results file not found at {results_path}")
    print("Please run experiments first using: python scripts/run_all_experiments.py")
    df = pd.DataFrame()  # Empty dataframe


## Summary statistics


In [None]:
if not df.empty:
    print("Experiment parameter ranges:")
    print(f"  n: {df['n'].min()} to {df['n'].max()}")
    print(f"  p: {df['p'].min():.4f} to {df['p'].max():.4f}")
    print(f"  k: {df['k'].min()} to {df['k'].max()}")
    print(f"  Repetitions: {df['rep'].max() + 1}")
    print(f"\nTotal experiments: {len(df)}")


## Aggregate results by (n, p, k)


In [None]:
if not df.empty:
    # Aggregate results
    aggregated = aggregate_results(df)
    print("Aggregated results (mean, std, min, max):")
    print(aggregated)


## Summary tables by parameter


In [None]:
if not df.empty:
    # Summary by k
    print("\nMean spanner size by k:")
    summary_k = df.groupby('k')['spanner_size'].agg(['mean', 'std', 'count'])
    print(summary_k)
    
    print("\nMean stretch by k:")
    summary_stretch_k = df.groupby('k')[['max_stretch_edges', 'avg_stretch_edges']].mean()
    print(summary_stretch_k)


In [None]:
if not df.empty:
    # Summary by n
    print("\nMean spanner size by n:")
    summary_n = df.groupby('n')['spanner_size'].agg(['mean', 'std', 'count'])
    print(summary_n)
    
    print("\nMean runtime by n:")
    summary_time_n = df.groupby('n')[['time_gen', 'time_spanner', 'time_stretch']].mean()
    print(summary_time_n)


## Spanner size ratio to theoretical bound


In [None]:
if not df.empty:
    print("Mean spanner size ratio (|E(H)| / theoretical_bound) by k:")
    ratio_summary = df.groupby('k')['spanner_size_ratio'].agg(['mean', 'std', 'min', 'max'])
    print(ratio_summary)
    
    print("\nMean ratio by (n, k):")
    ratio_by_nk = df.groupby(['n', 'k'])['spanner_size_ratio'].mean().unstack()
    print(ratio_by_nk)
