# VAMOS Benchmark for SWEVO Paper

This notebook reproduces benchmark experiments and generates LaTeX tables for the paper.

**Features:**
- Family-grouped tables (ZDT, DTLZ, WFG) for main body
- Detailed per-problem tables for appendix with **row-wise best** marking
- Automatic main.tex update and PDF compilation

## Configuration

In [1]:
import subprocess
import pandas as pd
import numpy as np
import re
from pathlib import Path

# Paths (notebook is in paper/)
PAPER_DIR = Path("manuscript")
DATA_DIR = Path("..") / "experiments"
MAIN_TEX = PAPER_DIR / "main.tex"

#=============================================================================
# EXPERIMENT CONFIGURATION - Edit these to customize
#=============================================================================

# Problems to benchmark (by family)
ZDT_PROBLEMS = ["zdt1", "zdt2", "zdt3", "zdt4", "zdt6"]
DTLZ_PROBLEMS = ["dtlz1", "dtlz2", "dtlz3", "dtlz4", "dtlz7"]
WFG_PROBLEMS = ["wfg1", "wfg2", "wfg3", "wfg4", "wfg5", "wfg6", "wfg7", "wfg8", "wfg9"]

# Select which families to include
USE_ZDT = True
USE_DTLZ = True  
USE_WFG = True  # Set to True to include WFG (requires pymoo)

# Benchmark settings
N_EVALS = 100000  # Function evaluations per run
N_SEEDS = 3       # Independent runs per configuration
BACKENDS = ["vamos-numpy", "vamos-numba", "vamos-moocore", "pymoo"]

# Whether to run fresh benchmarks or load existing
RUN_NEW_BENCHMARK = False  # Set to True to run new benchmarks

# Output file
OUTPUT_CSV = DATA_DIR / "benchmark_paper.csv"

#=============================================================================

# Build problem list
PROBLEMS = []
if USE_ZDT:
    PROBLEMS.extend(ZDT_PROBLEMS)
if USE_DTLZ:
    PROBLEMS.extend(DTLZ_PROBLEMS)
if USE_WFG:
    PROBLEMS.extend(WFG_PROBLEMS)

print(f"Configured {len(PROBLEMS)} problems: {PROBLEMS}")
print(f"Backends: {BACKENDS}")
print(f"Evaluations per run: {N_EVALS:,}")
print(f"Seeds: {N_SEEDS}")
print(f"Total runs: {len(PROBLEMS) * len(BACKENDS) * N_SEEDS}")

Configured 19 problems: ['zdt1', 'zdt2', 'zdt3', 'zdt4', 'zdt6', 'dtlz1', 'dtlz2', 'dtlz3', 'dtlz4', 'dtlz7', 'wfg1', 'wfg2', 'wfg3', 'wfg4', 'wfg5', 'wfg6', 'wfg7', 'wfg8', 'wfg9']
Backends: ['vamos-numpy', 'vamos-numba', 'vamos-moocore', 'pymoo']
Evaluations per run: 100,000
Seeds: 3
Total runs: 228


## 1. Run Benchmarks (if enabled)

In [2]:
if RUN_NEW_BENCHMARK:
    import sys
    # Add src to path for imports
    sys.path.insert(0, str(Path("..") / "src"))
    
    from vamos.foundation.problem.registry import make_problem_selection
    from vamos import run_optimization
    import time
    
    results = []
    
    for problem_name in PROBLEMS:
        for seed in range(N_SEEDS):
            print(f"Running {problem_name} with seed {seed}...")
            
            # VAMOS backends
            for backend in ["numpy", "numba", "moocore"]:
                if f"vamos-{backend}" not in BACKENDS:
                    continue
                try:
                    problem = make_problem_selection(problem_name).instantiate()
                    start = time.perf_counter()
                    result = run_optimization(
                        problem, "nsgaii",
                        max_evaluations=N_EVALS,
                        pop_size=100,
                        engine=backend,
                        seed=seed
                    )
                    elapsed = time.perf_counter() - start
                    results.append({
                        "framework": f"VAMOS ({backend})",
                        "problem": problem_name,
                        "algorithm": "NSGA-II",
                        "n_evals": N_EVALS,
                        "seed": seed,
                        "runtime_seconds": elapsed,
                        "n_solutions": result.X.shape[0] if result.X is not None else 0,
                    })
                    print(f"  VAMOS ({backend})... {elapsed:.2f}s")
                except Exception as e:
                    print(f"  VAMOS ({backend})... FAILED: {e}")
            
            # pymoo
            if "pymoo" in BACKENDS:
                try:
                    from pymoo.algorithms.moo.nsga2 import NSGA2
                    from pymoo.optimize import minimize
                    from pymoo.termination import get_termination
                    from pymoo.problems import get_problem
                    
                    pymoo_problem = get_problem(problem_name)
                    algorithm = NSGA2(pop_size=100)
                    termination = get_termination("n_eval", N_EVALS)
                    
                    start = time.perf_counter()
                    res = minimize(pymoo_problem, algorithm, termination, seed=seed, verbose=False)
                    elapsed = time.perf_counter() - start
                    
                    results.append({
                        "framework": "pymoo",
                        "problem": problem_name,
                        "algorithm": "NSGA-II",
                        "n_evals": N_EVALS,
                        "seed": seed,
                        "runtime_seconds": elapsed,
                        "n_solutions": res.X.shape[0] if res.X is not None else 0,
                    })
                    print(f"  pymoo... {elapsed:.2f}s")
                except Exception as e:
                    print(f"  pymoo... FAILED: {e}")
    
    # Save results
    df = pd.DataFrame(results)
    df.to_csv(OUTPUT_CSV, index=False)
    print(f"\nSaved {len(df)} results to {OUTPUT_CSV}")
else:
    # Load existing results
    existing_csv = DATA_DIR / "benchmark_extended.csv"
    if existing_csv.exists():
        df = pd.read_csv(existing_csv)
        print(f"Loaded {len(df)} rows from {existing_csv}")
    else:
        raise FileNotFoundError(f"No benchmark data found. Set RUN_NEW_BENCHMARK = True to generate.")

Loaded 84 rows from ..\experiments\benchmark_extended.csv


## 2. Data Processing

In [3]:
# Classify problem family
def get_family(problem_name): 
    if problem_name.startswith('zdt'):
        return 'ZDT'
    elif problem_name.startswith('dtlz'):
        return 'DTLZ'
    elif problem_name.startswith('wfg'):
        return 'WFG'
    elif problem_name.startswith('lz'):
        return 'LZ09'
    return 'Other'

df['family'] = df['problem'].apply(get_family)

# Show summary
print("Problems by family:")
print(df.groupby('family')['problem'].nunique())
print("\nFrameworks:")
print(df['framework'].unique())

Problems by family:
family
DTLZ    3
ZDT     4
Name: problem, dtype: int64

Frameworks:
['VAMOS (numpy)' 'VAMOS (numba)' 'VAMOS (moocore)' 'pymoo']


## 3. Generate Family-Grouped Tables (Main Body)

In [4]:
# Family summary - median runtime by framework and family
family = df.groupby(['framework', 'family'])['runtime_seconds'].median().unstack()
family['Average'] = family.mean(axis=1)

# VAMOS backends only (Table 3)
vamos_family = family.loc[family.index.str.contains('VAMOS')].copy()
vamos_family.index = vamos_family.index.str.replace('VAMOS (', '').str.replace(')', '')

print("Table 3 - VAMOS Backend Comparison (by family):")
display(vamos_family.round(2))

Table 3 - VAMOS Backend Comparison (by family):


family,DTLZ,ZDT,Average
framework,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
moocore,0.62,0.44,0.53
numba,0.43,0.44,0.43
numpy,3.13,4.37,3.75


In [5]:
# VAMOS vs pymoo (Table 4)
comparison_family = family.loc[family.index.isin(['VAMOS (numba)', 'pymoo'])].copy()

# Calculate speedup
if 'pymoo' in comparison_family.index and 'VAMOS (numba)' in comparison_family.index:
    pymoo_t = comparison_family.loc['pymoo']
    numba_t = comparison_family.loc['VAMOS (numba)']
    speedup = pymoo_t / numba_t
    
    print("Table 4 - VAMOS (Numba) vs pymoo:")
    display(comparison_family.round(2))
    print(f"\nSpeedup by family:")
    for fam in speedup.index:
        if fam != 'Average':
            print(f"  {fam}: {speedup[fam]:.1f}x")
    print(f"  Overall: {speedup['Average']:.1f}x")
else:
    print("Missing frameworks for comparison")

Table 4 - VAMOS (Numba) vs pymoo:


family,DTLZ,ZDT,Average
framework,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
VAMOS (numba),0.43,0.44,0.43
pymoo,4.53,7.47,6.0



Speedup by family:
  DTLZ: 10.5x
  ZDT: 17.1x
  Overall: 13.8x


## 4. Generate Detailed Tables (Appendix) - Row-wise Best

In [6]:
# Detailed: problems as rows, backends as columns
detail = df.groupby(['framework', 'problem'])['runtime_seconds'].median().unstack()

# Table A.1: VAMOS backends (transposed)
backends_detail = detail.loc[detail.index.str.contains('VAMOS')].T.copy()
backends_detail.columns = backends_detail.columns.str.replace('VAMOS (', '').str.replace(')', '')

# Add average row
avg_row = backends_detail.mean()
avg_row.name = 'Average'
backends_detail = pd.concat([backends_detail, avg_row.to_frame().T])

print("Table A.1 - Backends (problems as rows):")
display(backends_detail.round(2))

Table A.1 - Backends (problems as rows):


framework,moocore,numba,numpy
dtlz1,0.61,0.44,3.11
dtlz2,0.62,0.4,3.01
dtlz3,0.73,0.48,3.14
zdt1,0.7,0.63,4.97
zdt2,0.51,0.46,4.72
zdt3,0.31,0.34,2.71
zdt4,0.31,0.32,2.55
Average,0.54,0.44,3.46


In [7]:
# Table A.2: VAMOS vs pymoo with speedup
if 'VAMOS (numba)' in detail.index and 'pymoo' in detail.index:
    comparison_detail = detail.loc[['VAMOS (numba)', 'pymoo']].T.copy()
    comparison_detail.columns = ['VAMOS', 'pymoo']
    comparison_detail = comparison_detail[['pymoo', 'VAMOS']]  # reorder
    comparison_detail['Speedup'] = comparison_detail['pymoo'] / comparison_detail['VAMOS']
    
    # Add average row
    avg_row = pd.Series({
        'pymoo': comparison_detail['pymoo'].mean(),
        'VAMOS': comparison_detail['VAMOS'].mean(),
        'Speedup': comparison_detail['pymoo'].mean() / comparison_detail['VAMOS'].mean()
    }, name='Average')
    comparison_detail = pd.concat([comparison_detail, avg_row.to_frame().T])
    
    print("Table A.2 - Comparison with Speedup:")
    display(comparison_detail.round(2))
else:
    print("Missing frameworks for detailed comparison")

Table A.2 - Comparison with Speedup:


Unnamed: 0,pymoo,VAMOS,Speedup
dtlz1,4.53,0.44,10.34
dtlz2,4.23,0.4,10.64
dtlz3,4.86,0.48,10.2
zdt1,8.6,0.63,13.72
zdt2,7.55,0.46,16.46
zdt3,4.61,0.34,13.39
zdt4,4.1,0.32,12.78
Average,5.5,0.44,12.56


## 5. LaTeX Generation with Row-wise Best

In [8]:
def make_latex_table_a1(df):
    """Table A.1: backends with row-wise minimum bolded."""
    lines = [
        r"\begin{table}[htbp]",
        r"\centering",
        r"\caption{Detailed VAMOS backend comparison: median runtime (seconds) per problem.}",
        r"\label{tab:detailed_backends}",
        r"\begin{tabular}{l|rrr}",
        r"\toprule",
        r"\textbf{Problem} & \textbf{NumPy} & \textbf{moocore} & \textbf{Numba} \\",
        r"\midrule",
    ]
    
    for idx, row in df.iterrows():
        vals = {'numpy': row['numpy'], 'moocore': row['moocore'], 'numba': row['numba']}
        min_val = min(vals.values())
        
        row_str = []
        for col in ['numpy', 'moocore', 'numba']:
            v = vals[col]
            if v == min_val:
                row_str.append(f"\\textbf{{{v:.2f}}}")
            else:
                row_str.append(f"{v:.2f}")
        
        if idx == 'Average':
            lines.append(r"\midrule")
            lines.append(f"\\textbf{{Average}} & {' & '.join(row_str)} \\\\")
        else:
            lines.append(f"{idx} & {' & '.join(row_str)} \\\\")
    
    lines.extend([r"\bottomrule", r"\end{tabular}", r"\end{table}"])
    return "\n".join(lines)

def make_latex_table_a2(df):
    """Table A.2: comparison with speedup, row-wise minimum bolded."""
    lines = [
        r"\begin{table}[htbp]",
        r"\centering",
        r"\caption{Detailed VAMOS vs pymoo comparison: median runtime (seconds) and speedup.}",
        r"\label{tab:detailed_comparison}",
        r"\begin{tabular}{l|rr|r}",
        r"\toprule",
        r"\textbf{Problem} & \textbf{pymoo} & \textbf{VAMOS} & \textbf{Speedup} \\",
        r"\midrule",
    ]
    
    for idx, row in df.iterrows():
        pymoo_v = row['pymoo']
        vamos_v = row['VAMOS']
        speedup = row['Speedup']
        
        # Bold the minimum (faster one)
        if vamos_v < pymoo_v:
            vamos_str = f"\\textbf{{{vamos_v:.2f}}}"
            pymoo_str = f"{pymoo_v:.2f}"
        else:
            vamos_str = f"{vamos_v:.2f}"
            pymoo_str = f"\\textbf{{{pymoo_v:.2f}}}"
        
        if idx == 'Average':
            lines.append(r"\midrule")
            lines.append(f"\\textbf{{Average}} & {pymoo_str} & {vamos_str} & \\textbf{{{speedup:.1f}$\\times$}} \\\\")
        else:
            lines.append(f"{idx} & {pymoo_str} & {vamos_str} & {speedup:.1f}$\\times$ \\\\")
    
    lines.extend([r"\bottomrule", r"\end{tabular}", r"\end{table}"])
    return "\n".join(lines)

In [9]:
# Generate LaTeX
table_a1_latex = make_latex_table_a1(backends_detail)
table_a2_latex = make_latex_table_a2(comparison_detail)

print("=" * 60)
print("TABLE A.1 - Backends (row-wise best)")
print("=" * 60)
print(table_a1_latex)
print()
print("=" * 60)
print("TABLE A.2 - Comparison")
print("=" * 60)
print(table_a2_latex)

TABLE A.1 - Backends (row-wise best)
\begin{table}[htbp]
\centering
\caption{Detailed VAMOS backend comparison: median runtime (seconds) per problem.}
\label{tab:detailed_backends}
\begin{tabular}{l|rrr}
\toprule
\textbf{Problem} & \textbf{NumPy} & \textbf{moocore} & \textbf{Numba} \\
\midrule
dtlz1 & 3.11 & 0.61 & \textbf{0.44} \\
dtlz2 & 3.01 & 0.62 & \textbf{0.40} \\
dtlz3 & 3.14 & 0.73 & \textbf{0.48} \\
zdt1 & 4.97 & 0.70 & \textbf{0.63} \\
zdt2 & 4.72 & 0.51 & \textbf{0.46} \\
zdt3 & 2.71 & \textbf{0.31} & 0.34 \\
zdt4 & 2.55 & \textbf{0.31} & 0.32 \\
\midrule
\textbf{Average} & 3.46 & 0.54 & \textbf{0.44} \\
\bottomrule
\end{tabular}
\end{table}

TABLE A.2 - Comparison
\begin{table}[htbp]
\centering
\caption{Detailed VAMOS vs pymoo comparison: median runtime (seconds) and speedup.}
\label{tab:detailed_comparison}
\begin{tabular}{l|rr|r}
\toprule
\textbf{Problem} & \textbf{pymoo} & \textbf{VAMOS} & \textbf{Speedup} \\
\midrule
dtlz1 & 4.53 & \textbf{0.44} & 10.3$\times$ \\
dtlz2 

## 6. Update main.tex and Compile PDF

In [10]:
def replace_table_in_tex(content: str, label: str, new_table: str) -> str:
    """Replace a table in LaTeX content by its label."""
    pattern = r"\\begin\{table\}.*?\\label\{" + re.escape(label) + r"\}.*?\\end\{table\}"
    match = re.search(pattern, content, re.DOTALL)
    if match:
        return content[:match.start()] + new_table + content[match.end():]
    print(f"Warning: Table {label} not found")
    return content

def compile_latex(tex_path: Path) -> bool:
    """Compile LaTeX to PDF."""
    result = subprocess.run(
        ['pdflatex', '-interaction=nonstopmode', tex_path.name],
        cwd=tex_path.parent,
        capture_output=True, text=True
    )
    # Clean aux files
    for ext in ['.aux', '.log', '.out']:
        aux = tex_path.parent / (tex_path.stem + ext)
        try:
            aux.unlink()
        except:
            pass
    return result.returncode == 0

In [11]:
# Set to True to update main.tex and recompile
UPDATE_LATEX = False

if UPDATE_LATEX:
    content = MAIN_TEX.read_text(encoding='utf-8')
    original_len = len(content)
    
    # Replace appendix tables
    content = replace_table_in_tex(content, 'tab:detailed_backends', table_a1_latex)
    content = replace_table_in_tex(content, 'tab:detailed_comparison', table_a2_latex)
    
    # Safety check
    if len(content) >= original_len * 0.9:
        MAIN_TEX.write_text(content, encoding='utf-8')
        print(f"main.tex updated ({len(content)} bytes)")
        
        if compile_latex(MAIN_TEX):
            print(f"PDF compiled: {MAIN_TEX.parent / 'main.pdf'}")
        else:
            print("PDF compilation failed")
    else:
        print(f"ERROR: Content too short, skipping write")
else:
    print("Set UPDATE_LATEX = True to update main.tex and recompile PDF")

Set UPDATE_LATEX = True to update main.tex and recompile PDF


## 7. Summary Statistics

In [12]:
print("=" * 50)
print("KEY STATISTICS FOR PAPER")
print("=" * 50)

# Best backend
numba_avg = vamos_family.loc['numba', 'Average']
numpy_avg = vamos_family.loc['numpy', 'Average']
moocore_avg = vamos_family.loc['moocore', 'Average']

print(f"Best backend: Numba ({numba_avg:.2f}s avg)")
print(f"NumPy baseline: {numpy_avg:.2f}s avg")
print(f"moocore: {moocore_avg:.2f}s avg")
print()

# Speedup by family
print(f"Speedup vs pymoo:")
for fam in speedup.index:
    if fam != 'Average' and pd.notna(speedup[fam]):
        print(f"  {fam} family: {speedup[fam]:.1f}x")
print(f"  Overall: {speedup['Average']:.1f}x")

print()
print(f"Total problems benchmarked: {df['problem'].nunique()}")
print(f"Total runs: {len(df)}")

KEY STATISTICS FOR PAPER
Best backend: Numba (0.43s avg)
NumPy baseline: 3.75s avg
moocore: 0.53s avg

Speedup vs pymoo:
  DTLZ family: 10.5x
  ZDT family: 17.1x
  Overall: 13.8x

Total problems benchmarked: 7
Total runs: 84
