# Benchmarks and Metrics with StudyRunner

Run multi-problem studies, compare algorithms and backends, and inspect hypervolume metrics.

This notebook covers:
1. **Algorithm comparison** - NSGA-II vs MOEA/D across problems
2. **Backend comparison** - numpy vs numba vs moocore engines
3. **Pareto front visualization** - overlay fronts from different runs
4. **Performance metrics** - HV, runtime, evaluations/sec

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from vamos import ExperimentConfig
from vamos.experiment.study.runner import StudyRunner, StudyTask
from vamos.ux.analysis.tuning_viz import study_results_to_dataframe
from vamos.foundation.core.runner import resolve_kernel

plt.style.use("ggplot")

# Detect available backends
def detect_engines(candidates):
    engines = []
    for name in candidates:
        try:
            resolve_kernel(name)
            engines.append(name)
        except Exception as exc:
            print(f"Skipping engine '{name}': {exc}")
    return engines

AVAILABLE_ENGINES = detect_engines(["numpy", "numba", "moocore"])
print(f"Available engines: {', '.join(AVAILABLE_ENGINES)}")

In [None]:
# Build a study grid: problems × algorithms × engines × seeds
problems = [
    {"problem": "zdt1", "n_var": 12},
    {"problem": "zdt2", "n_var": 12},
]
algorithms = ["nsgaii", "moead"]
engines = AVAILABLE_ENGINES[:2]  # Use first 2 available for speed
seeds = [0, 1]

tasks = []
for prob in problems:
    for alg in algorithms:
        for engine in engines:
            for seed in seeds:
                tasks.append(
                    StudyTask(
                        algorithm=alg,
                        engine=engine,
                        problem=prob["problem"],
                        n_var=prob["n_var"],
                        seed=seed,
                        config_overrides={
                            "population_size": 30,
                            "offspring_population_size": 30,
                            "max_evaluations": 400,
                        },
                    )
                )

print(f"Total tasks: {len(tasks)}")
runner = StudyRunner(verbose=True)
results = runner.run(tasks)
df = study_results_to_dataframe(results)
df.head(10)

In [None]:
# Summarize HV per algorithm/problem/engine
summary = df.groupby(["problem", "algorithm", "engine"]).agg({
    "hv": ["mean", "std"],
    "time_ms": ["mean"],
}).round(4)
summary.columns = ["hv_mean", "hv_std", "time_ms_mean"]
summary = summary.reset_index()
summary

In [None]:
# HV comparison by algorithm
pivot_algo = summary.groupby(["problem", "algorithm"])["hv_mean"].mean().unstack()
ax = pivot_algo.plot(kind="bar", figsize=(8, 4))
ax.set_ylabel("Hypervolume (mean)")
ax.set_title("HV by Algorithm")
plt.tight_layout()
plt.show()

## Backend Comparison

Compare performance across different kernel backends (numpy, numba, moocore).

In [None]:
# HV by backend engine
pivot_engine = summary.groupby(["problem", "engine"])["hv_mean"].mean().unstack()
ax = pivot_engine.plot(kind="bar", figsize=(8, 4))
ax.set_ylabel("Hypervolume (mean)")
ax.set_title("HV by Backend Engine")
plt.tight_layout()
plt.show()

In [None]:
# Runtime comparison by backend
pivot_time = summary.groupby(["problem", "engine"])["time_ms_mean"].mean().unstack()
ax = pivot_time.plot(kind="bar", figsize=(8, 4), color=['tab:blue', 'tab:orange', 'tab:green'][:len(pivot_time.columns)])
ax.set_ylabel("Time (ms)")
ax.set_title("Runtime by Backend Engine")
plt.tight_layout()
plt.show()

## Pareto Front Visualization

Overlay Pareto fronts from different engines to verify consistency.

In [None]:
# Plot Pareto fronts by problem, overlaying different engines
if not results:
    print("No results available yet. Run the study cell first.")
else:
    # Group results by problem
    by_problem = {}
    for res in results:
        key = res.selection.spec.label
        by_problem.setdefault(key, []).append(res)
    
    for label, entries in by_problem.items():
        fig, ax = plt.subplots(figsize=(7, 5))
        for res in entries:
            F = np.asarray(res.metrics["F"])
            engine = res.metrics["engine"]
            alg = res.metrics["algorithm"]
            ax.scatter(F[:, 0], F[:, 1], s=25, alpha=0.6, label=f"{alg}/{engine}")
        
        ax.set_xlabel("Objective 1")
        ax.set_ylabel("Objective 2")
        ax.set_title(f"{label} - Pareto Fronts")
        ax.legend(loc="upper right", fontsize=8)
        ax.grid(True, alpha=0.3)
        plt.tight_layout()
        plt.show()