# HPO Analysis

In [None]:
import json
import sys
from pathlib import Path
from collections import defaultdict
import matplotlib.pyplot as plt
import numpy as np

REPO_ROOT = Path.cwd().parent if Path.cwd().name == 'notebooks' else Path.cwd()
sys.path.append(str(REPO_ROOT))

EXPERIMENT_DIR = REPO_ROOT / '.cache' / 'experiment'
FINAL_TRAINING_DIR = REPO_ROOT / '.cache' / 'final_training'
COLORS = {'RS': '#1f77b4', 'GA-STANDARD': '#ff7f0e', 'GA-MEMETIC': '#d62728', 'PSO': '#2ca02c'}

def parse_experiment_name(exp_name):
    if '-' not in exp_name:
        return None, None
    parts = exp_name.split('-', 1)
    return parts[0].upper(), parts[1].upper()

def load_experiment_summaries(exp_dir, filter_fn=None):
    data = []
    for run_dir in sorted(exp_dir.iterdir()):
        if not run_dir.is_dir() or not run_dir.name.startswith('run_'):
            continue
        summary_file = run_dir / 'summary.json'
        if summary_file.exists():
            with open(summary_file) as f:
                summary = json.load(f)
            if filter_fn is None or filter_fn(summary):
                data.append((run_dir.name, summary))
    return data

print(f"Repository root: {REPO_ROOT}")


## Box Plots of Final Fitness

In [None]:
hpo_grouped = defaultdict(lambda: defaultdict(list))

for exp_dir in sorted(EXPERIMENT_DIR.iterdir()):
    if not exp_dir.is_dir():
        continue
    model, optimizer = parse_experiment_name(exp_dir.name)
    if not optimizer:
        continue
    for _, summary in load_experiment_summaries(exp_dir, lambda s: s.get('final_fitness') is not None):
        hpo_grouped[model][optimizer].append(summary['final_fitness'])

print(f"Loaded {sum(len(v) for d in hpo_grouped.values() for v in d.values())} HPO runs\n")
for model in sorted(hpo_grouped.keys()):
    print(f"{model}:")
    for opt in sorted(hpo_grouped[model].keys()):
        scores = hpo_grouped[model][opt]
        print(f"  {opt}: {len(scores)} runs, mean={np.mean(scores):.4f}")


In [None]:
models = sorted(hpo_grouped.keys())
fig, axes = plt.subplots(1, len(models), figsize=(5 * len(models), 5))
if len(models) == 1:
    axes = [axes]

for ax, model in zip(axes, models):
    optimizers = sorted(hpo_grouped[model].keys())
    data_to_plot = [hpo_grouped[model][opt] for opt in optimizers]
    
    
    bp = ax.boxplot(data_to_plot, tick_labels=optimizers, patch_artist=True)
    for patch in bp['boxes']:
        patch.set_facecolor('lightblue')
    
    ax.set_title(f'{model}', fontweight='bold')
    ax.set_xlabel('Optimizer')
    ax.set_ylabel('Composite Fitness')
    ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()


## Test Set Results

In [None]:
final_grouped = defaultdict(list)

for exp_dir in sorted(FINAL_TRAINING_DIR.iterdir()):
    if not exp_dir.is_dir():
        continue
    model, optimizer = parse_experiment_name(exp_dir.name)
    if not optimizer:
        continue
    
    run_dirs = sorted([d for d in exp_dir.iterdir() if d.is_dir() and d.name.startswith('run_')])
    if run_dirs:
        summaries = load_experiment_summaries(run_dirs[-1].parent, 
                                             lambda s: s.get('test_metrics', {}).get('composite_fitness') is not None)
        if summaries:
            _, summary = summaries[-1]
            test_metrics = summary['test_metrics']
            final_grouped[model].append({
                'Optimizer': optimizer,
                'Composite': test_metrics['composite_fitness'],
                'Accuracy': test_metrics.get('accuracy'),
                'F1': test_metrics.get('f1_score')
            })

print(f"Loaded {sum(len(v) for v in final_grouped.values())} final training results\n")
for model in sorted(final_grouped.keys()):
    print(f"{model}:")
    for entry in final_grouped[model]:
        print(f"  {entry['Optimizer']}: composite={entry['Composite']:.4f}")


In [None]:
models = sorted(final_grouped.keys())
fig, axes = plt.subplots(1, len(models), figsize=(5 * len(models), 5))
if len(models) == 1:
    axes = [axes]

for ax, model in zip(axes, models):
    entries = final_grouped[model]
    labels = [e['Optimizer'] for e in entries]
    values = [e['Composite'] for e in entries]
    bar_colors = [COLORS.get(opt, '#888888') for opt in labels]
    
    bars = ax.bar(labels, values, color=bar_colors, alpha=0.8, edgecolor='black')
    for bar, val in zip(bars, values):
        ax.text(bar.get_x() + bar.get_width()/2., bar.get_height() + 0.01,
               f'{val:.4f}', ha='center', va='bottom', fontsize=9)
    
    ax.set_ylim(0, 1)
    ax.set_title(f'{model}', fontweight='bold')
    ax.set_xlabel('Optimizer')
    ax.set_ylabel('Composite Fitness')
    ax.grid(True, alpha=0.3, axis='y')

plt.tight_layout()
plt.show()


## Convergence Plots

In [None]:
convergence_data = defaultdict(lambda: defaultdict(list))

for exp_dir in sorted(EXPERIMENT_DIR.iterdir()):
    if not exp_dir.is_dir():
        continue
    model, optimizer = parse_experiment_name(exp_dir.name)
    if not optimizer:
        continue
    
    for _, summary in load_experiment_summaries(exp_dir, lambda s: 'convergence_trace' in s and isinstance(s.get('convergence_trace'), dict)):
        trace = summary['convergence_trace']
        best_fitness = trace.get('best_fitness', [])
        if best_fitness:
            convergence_data[model][optimizer].append(best_fitness)

print(f"Loaded convergence data for {len(convergence_data)} models")
for model in sorted(convergence_data.keys()):
    print(f"{model}: {sum(len(v) for v in convergence_data[model].values())} runs")


In [None]:
models = sorted(convergence_data.keys())
fig, axes = plt.subplots(1, len(models), figsize=(6 * len(models), 5))
if len(models) == 1:
    axes = [axes]

for ax, model in zip(axes, models):
    for optimizer in sorted(convergence_data[model].keys()):
        runs = convergence_data[model][optimizer]
        if not runs:
            continue
        
        max_len = max(len(r) for r in runs)
        padded = [r + [r[-1]] * (max_len - len(r)) if len(r) < max_len else r for r in runs]
        runs_array = np.array(padded)
        
        mean_curve = runs_array.mean(axis=0)
        std_curve = runs_array.std(axis=0)
        generations = np.arange(len(mean_curve))
        color = COLORS.get(optimizer, '#888888')
        
        ax.plot(generations, mean_curve, label=optimizer, color=color, linewidth=2)
        ax.fill_between(generations, mean_curve - std_curve, mean_curve + std_curve, 
                       color=color, alpha=0.2)
    
    ax.set_title(f'{model}', fontweight='bold')
    ax.set_xlabel('Evaluation Count ($n$)')
    ax.set_ylabel('Best Fitness')
    ax.legend()
    ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()


## Wilcoxon Tests

In [None]:
from scipy.stats import wilcoxon
from itertools import combinations

for model in sorted(hpo_grouped.keys()):
    print(f"\n--- {model} ---")
    optimizers = hpo_grouped[model]
    
    optimizer_names = sorted(optimizers.keys())
    optimizer_scores = {name: optimizers[name] for name in optimizer_names}
    
    for opt1, opt2 in combinations(optimizer_names, 2):
        scores1 = optimizer_scores[opt1]
        scores2 = optimizer_scores[opt2]
        
        if len(scores1) == len(scores2) and len(scores1) > 0:
            _, p = wilcoxon(scores1, scores2)
            sig = ' (Significant)' if p < 0.05 else ''
            print(f"{opt1} vs {opt2}: p-value = {p:.5f}{sig}")
        else:
            print(f"{opt1} vs {opt2}: Sample size mismatch ({len(scores1)} vs {len(scores2)})")
