# Hyperparameter Tuning Results Visualization

This notebook visualizes results from distributed Bayesian optimization runs.

In [26]:
import json
import os
from glob import glob

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

# Use a compatible style
try:
    plt.style.use('seaborn-v0_8-whitegrid')
except:
    try:
        plt.style.use('seaborn-whitegrid')
    except:
        pass  # Use default style

plt.rcParams['figure.figsize'] = (12, 6)
plt.rcParams['font.size'] = 12

## Load Results

In [27]:
# Find all study directories
TUNE_DIR = 'tune_distributed'

studies = sorted(glob(os.path.join(TUNE_DIR, '*')))
print(f"Found {len(studies)} studies:")
for s in studies:
    print(f"  - {os.path.basename(s)}")

Found 4 studies:
  - l1_distributed_cifar100_20260121_124638
  - l1_distributed_cifar100_20260121_124644
  - l1_distributed_cifar100_20260121_124726
  - l1_distributed_cifar100_20260121_124824


In [28]:
# Select study to analyze (use most recent by default)
STUDY_DIR = studies[-1] if studies else None
print(f"Analyzing: {STUDY_DIR}")

Analyzing: tune_distributed/l1_distributed_cifar100_20260121_124824


In [None]:
def load_study_results(study_dir):
    """Load all trial results from a study directory."""
    results = []
    
    # Try new structure: metrics/<mode>/*.json
    metrics_dir = os.path.join(study_dir, 'metrics')
    if os.path.exists(metrics_dir):
        for mode in os.listdir(metrics_dir):
            mode_dir = os.path.join(metrics_dir, mode)
            if os.path.isdir(mode_dir):
                for f in glob(os.path.join(mode_dir, '*.json')):
                    with open(f, 'r') as fp:
                        data = json.load(fp)
                        data['mode'] = mode
                        data['trial_file'] = os.path.basename(f)
                        results.append(data)
    
    # Try loading from results.json (contains all trial data)
    if not results:
        results_json = os.path.join(study_dir, 'results.json')
        if os.path.exists(results_json):
            with open(results_json, 'r') as f:
                data = json.load(f)
                
            # Extract mode from config
            config_path = os.path.join(study_dir, 'config.json')
            mode = 'unknown'
            if os.path.exists(config_path):
                with open(config_path, 'r') as f:
                    config = json.load(f)
                    mode = config.get('mode', 'unknown')
            
            # Parse trials - check both 'trials' and 'all_trials' keys
            trials_list = data.get('trials') or data.get('all_trials') or []
            for trial in trials_list:
                trial_data = {
                    'mode': mode,
                    'trial_id': trial.get('number'),
                    'accuracy_after_pruning': trial.get('value'),
                }
                # Add params
                if 'params' in trial:
                    trial_data.update(trial['params'])
                # Add user_attrs
                if 'user_attrs' in trial:
                    trial_data.update(trial['user_attrs'])
                results.append(trial_data)
    
    return pd.DataFrame(results)

df = load_study_results(STUDY_DIR)
print(f"Loaded {len(df)} trials")
if len(df) > 0:
    print(f"Columns: {list(df.columns)}")
    display(df.head())

In [30]:
# Extract hyperparameters from params dict if present
if 'params' in df.columns:
    params_df = pd.json_normalize(df['params'])
    df = pd.concat([df.drop('params', axis=1), params_df], axis=1)
    print("Extracted params")
    display(df.head())

## Optimization History

In [31]:
if len(df) > 0 and 'accuracy_after_pruning' in df.columns:
    fig, axes = plt.subplots(1, 2, figsize=(14, 5))

    for mode in df['mode'].unique():
        mode_df = df[df['mode'] == mode].copy()
        mode_df = mode_df.reset_index(drop=True)
        
        # Plot all trials
        axes[0].scatter(range(len(mode_df)), mode_df['accuracy_after_pruning'], 
                        label=f'{mode}', alpha=0.7, s=50)
        
        # Plot best so far
        best_so_far = mode_df['accuracy_after_pruning'].cummax()
        axes[1].plot(range(len(mode_df)), best_so_far, label=f'{mode}', linewidth=2)

    axes[0].set_xlabel('Trial')
    axes[0].set_ylabel('Accuracy After Pruning (%)')
    axes[0].set_title('All Trials')
    axes[0].legend()

    axes[1].set_xlabel('Trial')
    axes[1].set_ylabel('Best Accuracy So Far (%)')
    axes[1].set_title('Optimization Progress')
    axes[1].legend()

    plt.tight_layout()
    plt.show()
else:
    print("No accuracy data to plot")

No accuracy data to plot


## L1 Mode: Gamma vs Accuracy

In [32]:
l1_df = df[df['mode'] == 'l1'].copy() if 'mode' in df.columns else pd.DataFrame()

if len(l1_df) > 0 and 'gamma' in l1_df.columns:
    fig, axes = plt.subplots(1, 2, figsize=(14, 5))
    
    # Gamma vs accuracy (log scale)
    has_before = 'accuracy_before_pruning' in l1_df.columns and l1_df['accuracy_before_pruning'].notna().any()
    
    if has_before:
        sc = axes[0].scatter(l1_df['gamma'], l1_df['accuracy_after_pruning'], 
                        c=l1_df['accuracy_before_pruning'], cmap='viridis', s=80, alpha=0.7)
        cbar = plt.colorbar(sc, ax=axes[0])
        cbar.set_label('Acc Before Pruning')
    else:
        axes[0].scatter(l1_df['gamma'], l1_df['accuracy_after_pruning'], s=80, alpha=0.7)
    
    axes[0].set_xscale('log')
    axes[0].set_xlabel('Gamma (log scale)')
    axes[0].set_ylabel('Accuracy After Pruning (%)')
    axes[0].set_title('L1: Gamma vs Accuracy')
    
    # Before vs after pruning
    if has_before:
        sc = axes[1].scatter(l1_df['accuracy_before_pruning'], l1_df['accuracy_after_pruning'], 
                        c=l1_df['gamma'], cmap='plasma', s=80, alpha=0.7)
        axes[1].plot([0, 100], [0, 100], 'k--', alpha=0.3, label='y=x')
        axes[1].set_xlabel('Accuracy Before Pruning (%)')
        axes[1].set_ylabel('Accuracy After Pruning (%)')
        axes[1].set_title('L1: Before vs After Pruning')
        cbar = plt.colorbar(sc, ax=axes[1])
        cbar.set_label('Gamma')
    else:
        # Histogram of accuracies
        axes[1].hist(l1_df['accuracy_after_pruning'], bins=20, edgecolor='black')
        axes[1].set_xlabel('Accuracy After Pruning (%)')
        axes[1].set_ylabel('Count')
        axes[1].set_title('L1: Accuracy Distribution')
    
    plt.tight_layout()
    plt.show()
    
    # Best trial
    best_idx = l1_df['accuracy_after_pruning'].idxmax()
    best = l1_df.loc[best_idx]
    print(f"\nBest L1 trial:")
    print(f"  Gamma: {best['gamma']:.2f}")
    if has_before:
        print(f"  Accuracy before pruning: {best['accuracy_before_pruning']:.2f}%")
    print(f"  Accuracy after pruning: {best['accuracy_after_pruning']:.2f}%")
    if 'actual_sparsity' in best:
        print(f"  Actual sparsity: {best['actual_sparsity']:.2f}%")
else:
    print("No L1 trials found (or no gamma column)")

No L1 trials found (or no gamma column)


## Spatial Mode: Hyperparameter Analysis

In [33]:
spatial_df = df[df['mode'] == 'spatial'].copy() if 'mode' in df.columns else pd.DataFrame()

if len(spatial_df) > 0 and 'gamma_spatial' in spatial_df.columns:
    fig, axes = plt.subplots(2, 2, figsize=(14, 10))
    
    has_D = 'D' in spatial_df.columns
    has_before = 'accuracy_before_pruning' in spatial_df.columns and spatial_df['accuracy_before_pruning'].notna().any()
    
    # Gamma_spatial vs accuracy
    if has_D:
        sc = axes[0, 0].scatter(spatial_df['gamma_spatial'], spatial_df['accuracy_after_pruning'], 
                                c=spatial_df['D'], cmap='viridis', s=80, alpha=0.7)
        plt.colorbar(sc, ax=axes[0, 0], label='D')
    else:
        axes[0, 0].scatter(spatial_df['gamma_spatial'], spatial_df['accuracy_after_pruning'], s=80, alpha=0.7)
    axes[0, 0].set_xscale('log')
    axes[0, 0].set_xlabel('Gamma Spatial (log scale)')
    axes[0, 0].set_ylabel('Accuracy After Pruning (%)')
    axes[0, 0].set_title('Spatial: Gamma_spatial vs Accuracy')
    
    # D vs accuracy
    if has_D:
        sc = axes[0, 1].scatter(spatial_df['D'], spatial_df['accuracy_after_pruning'], 
                                c=spatial_df['gamma_spatial'], cmap='plasma', s=80, alpha=0.7)
        axes[0, 1].set_xlabel('D')
        axes[0, 1].set_ylabel('Accuracy After Pruning (%)')
        axes[0, 1].set_title('Spatial: D vs Accuracy')
        plt.colorbar(sc, ax=axes[0, 1], label='Gamma Spatial')
    else:
        axes[0, 1].text(0.5, 0.5, 'No D parameter', ha='center', va='center', transform=axes[0, 1].transAxes)
    
    # Gamma_l1 vs accuracy (if used)
    if 'gamma_l1' in spatial_df.columns and spatial_df['gamma_l1'].max() > 0:
        sc = axes[1, 0].scatter(spatial_df['gamma_l1'], spatial_df['accuracy_after_pruning'], 
                                c=spatial_df['gamma_spatial'], cmap='viridis', s=80, alpha=0.7)
        axes[1, 0].set_xscale('log')
        axes[1, 0].set_xlabel('Gamma L1 (log scale)')
        axes[1, 0].set_ylabel('Accuracy After Pruning (%)')
        axes[1, 0].set_title('Spatial: Gamma_L1 vs Accuracy')
        plt.colorbar(sc, ax=axes[1, 0], label='Gamma Spatial')
    else:
        axes[1, 0].text(0.5, 0.5, 'No L1 penalty used', ha='center', va='center', transform=axes[1, 0].transAxes)
        axes[1, 0].set_title('Spatial: Gamma_L1 vs Accuracy')
    
    # Before vs after pruning
    if has_before:
        if has_D:
            sc = axes[1, 1].scatter(spatial_df['accuracy_before_pruning'], spatial_df['accuracy_after_pruning'], 
                                    c=spatial_df['D'], cmap='viridis', s=80, alpha=0.7)
            plt.colorbar(sc, ax=axes[1, 1], label='D')
        else:
            axes[1, 1].scatter(spatial_df['accuracy_before_pruning'], spatial_df['accuracy_after_pruning'], s=80, alpha=0.7)
        axes[1, 1].plot([0, 100], [0, 100], 'k--', alpha=0.3)
        axes[1, 1].set_xlabel('Accuracy Before Pruning (%)')
        axes[1, 1].set_ylabel('Accuracy After Pruning (%)')
        axes[1, 1].set_title('Spatial: Before vs After Pruning')
    else:
        axes[1, 1].hist(spatial_df['accuracy_after_pruning'], bins=20, edgecolor='black')
        axes[1, 1].set_xlabel('Accuracy After Pruning (%)')
        axes[1, 1].set_ylabel('Count')
        axes[1, 1].set_title('Spatial: Accuracy Distribution')
    
    plt.tight_layout()
    plt.show()
    
    # Best trial
    best_idx = spatial_df['accuracy_after_pruning'].idxmax()
    best = spatial_df.loc[best_idx]
    print(f"\nBest Spatial trial:")
    print(f"  Gamma Spatial: {best['gamma_spatial']:.2f}")
    if 'gamma_l1' in best:
        print(f"  Gamma L1: {best['gamma_l1']:.2f}")
    if 'D' in best:
        print(f"  D: {best['D']:.4f}")
    if has_before:
        print(f"  Accuracy before pruning: {best['accuracy_before_pruning']:.2f}%")
    print(f"  Accuracy after pruning: {best['accuracy_after_pruning']:.2f}%")
    if 'actual_sparsity' in best:
        print(f"  Actual sparsity: {best['actual_sparsity']:.2f}%")
else:
    print("No Spatial trials found (or no gamma_spatial column)")

No Spatial trials found (or no gamma_spatial column)


## Compare L1 vs Spatial

In [34]:
if 'mode' in df.columns and len(df['mode'].unique()) > 1 and 'accuracy_after_pruning' in df.columns:
    fig, axes = plt.subplots(1, 2, figsize=(14, 5))
    
    # Box plot comparison
    modes = df['mode'].unique()
    data_for_box = [df[df['mode'] == m]['accuracy_after_pruning'].dropna() for m in modes]
    axes[0].boxplot(data_for_box, labels=modes)
    axes[0].set_xlabel('Mode')
    axes[0].set_ylabel('Accuracy After Pruning (%)')
    axes[0].set_title('Accuracy Distribution by Mode')
    
    # Summary stats
    summary = df.groupby('mode')['accuracy_after_pruning'].agg(['mean', 'std', 'max', 'count'])
    
    # Bar chart of best
    x = np.arange(len(modes))
    width = 0.35
    
    bars1 = axes[1].bar(x - width/2, summary.loc[modes, 'mean'], width, label='Mean', 
                        yerr=summary.loc[modes, 'std'], capsize=5)
    bars2 = axes[1].bar(x + width/2, summary.loc[modes, 'max'], width, label='Best')
    
    axes[1].set_xlabel('Mode')
    axes[1].set_ylabel('Accuracy After Pruning (%)')
    axes[1].set_title('L1 vs Spatial Comparison')
    axes[1].set_xticks(x)
    axes[1].set_xticklabels(modes)
    axes[1].legend()
    
    plt.tight_layout()
    plt.show()
    
    print("\nSummary Statistics:")
    print(summary.to_string())
else:
    print("Only one mode found or no accuracy data - comparison requires both L1 and Spatial results")

Only one mode found or no accuracy data - comparison requires both L1 and Spatial results


## All Trials Table

In [35]:
if len(df) > 0:
    # Display sorted by accuracy
    display_cols = ['mode', 'accuracy_before_pruning', 'accuracy_after_pruning', 'actual_sparsity']
    if 'gamma' in df.columns:
        display_cols.append('gamma')
    if 'gamma_spatial' in df.columns:
        display_cols.extend(['gamma_spatial', 'gamma_l1', 'D'])

    display_cols = [c for c in display_cols if c in df.columns]
    
    if 'accuracy_after_pruning' in df.columns:
        display(df[display_cols].sort_values('accuracy_after_pruning', ascending=False).head(20))
    else:
        display(df[display_cols].head(20))
else:
    print("No data to display")

No data to display


## Load Multiple Studies for Comparison

In [36]:
def load_all_studies(tune_dir='tune_distributed'):
    """Load results from all studies."""
    all_results = []
    
    for study_path in glob(os.path.join(tune_dir, '*')):
        if os.path.isdir(study_path):
            study_name = os.path.basename(study_path)
            study_df = load_study_results(study_path)
            if len(study_df) > 0:
                study_df['study'] = study_name
                all_results.append(study_df)
    
    if all_results:
        return pd.concat(all_results, ignore_index=True)
    return pd.DataFrame()

all_df = load_all_studies()
if len(all_df) > 0:
    print(f"Loaded {len(all_df)} total trials from {all_df['study'].nunique()} studies")
else:
    print("No studies found")

No studies found


In [37]:
# Best results per study
if len(all_df) > 0 and 'accuracy_after_pruning' in all_df.columns:
    # Filter to rows with valid accuracy
    valid_df = all_df[all_df['accuracy_after_pruning'].notna()]
    if len(valid_df) > 0:
        best_per_study = valid_df.loc[valid_df.groupby('study')['accuracy_after_pruning'].idxmax()]
        cols = ['study', 'mode', 'accuracy_after_pruning']
        if 'accuracy_before_pruning' in best_per_study.columns:
            cols.insert(2, 'accuracy_before_pruning')
        if 'actual_sparsity' in best_per_study.columns:
            cols.append('actual_sparsity')
        cols = [c for c in cols if c in best_per_study.columns]
        print("Best trial from each study:")
        display(best_per_study[cols])
    else:
        print("No valid accuracy data")
else:
    print("No data to compare")

No data to compare
