# VAMOS Benchmark for SWEVO Paper

This notebook reproduces the benchmark experiments and generates LaTeX tables for the paper.

**Output:**
- Table 3: VAMOS backend comparison (grouped by problem family)
- Table 4: VAMOS vs pymoo comparison (grouped by problem family)
- Appendix tables: Detailed results per problem

In [1]:
import subprocess
import pandas as pd
import numpy as np
from pathlib import Path

# Paths (notebook is in paper/)
PAPER_DIR = Path("manuscript")
DATA_DIR = Path("..") / "experiments"
MAIN_TEX = PAPER_DIR / "main.tex"

## 1. Run Benchmarks (or load existing results)

In [2]:
# Configuration
PROBLEMS = ["zdt1", "zdt2", "zdt3", "zdt4", "dtlz1", "dtlz2", "dtlz3"]
EVALS = 100000
SEEDS = 3
FRAMEWORKS = ["vamos-numpy", "vamos-numba", "vamos-moocore", "pymoo"]
OUTPUT_CSV = DATA_DIR / "benchmark_paper.csv"

RUN_NEW_BENCHMARK = False  # Set to True to run fresh benchmarks

In [3]:
if RUN_NEW_BENCHMARK:
    cmd = [
        "python", str(DATA_DIR / "benchmark_frameworks.py"),
        "--problems", *PROBLEMS,
        "--evals", str(EVALS),
        "--seeds", str(SEEDS),
        "--frameworks", *FRAMEWORKS,
        "--output", str(OUTPUT_CSV)
    ]
    print(f"Running: {' '.join(cmd)}")
    subprocess.run(cmd, check=True)
else:
    # Load existing results
    OUTPUT_CSV = DATA_DIR / "benchmark_extended.csv"
    print(f"Loading existing results from {OUTPUT_CSV}")

Loading existing results from ..\experiments\benchmark_extended.csv


In [4]:
# Load data
df = pd.read_csv(OUTPUT_CSV)
print(f"Loaded {len(df)} rows")
df.head()

Loaded 84 rows


Unnamed: 0,framework,problem,algorithm,n_evals,seed,runtime_seconds,n_solutions,hv
0,VAMOS (numpy),zdt1,NSGA-II,100000,0,4.96591,100,
1,VAMOS (numba),zdt1,NSGA-II,100000,0,2.055119,100,
2,VAMOS (moocore),zdt1,NSGA-II,100000,0,0.515945,100,
3,pymoo,zdt1,NSGA-II,100000,0,8.601611,100,
4,VAMOS (numpy),zdt1,NSGA-II,100000,1,5.746121,100,


## 2. Data Processing

In [5]:
def classify_problem_family(problem: str) -> str:
    """Classify problem into family."""
    if problem.startswith("zdt"):
        return "ZDT"
    elif problem.startswith("dtlz"):
        return "DTLZ"
    elif problem.startswith("wfg"):
        return "WFG"
    return "Other"

def clean_framework_name(name: str) -> str:
    """Clean framework name for display."""
    return name.replace("VAMOS (", "").replace(")", "").replace("vamos-", "")

# Add problem family
df["family"] = df["problem"].apply(classify_problem_family)
df["backend"] = df["framework"].apply(clean_framework_name)

In [6]:
# Calculate median runtime per framework/problem
summary = df.groupby(["framework", "problem", "family"]).agg({
    "runtime_seconds": ["median", "std", "count"]
}).reset_index()
summary.columns = ["framework", "problem", "family", "median_time", "std_time", "n_runs"]
summary

Unnamed: 0,framework,problem,family,median_time,std_time,n_runs
0,VAMOS (moocore),dtlz1,DTLZ,0.614458,0.007617,3
1,VAMOS (moocore),dtlz2,DTLZ,0.623865,0.057044,3
2,VAMOS (moocore),dtlz3,DTLZ,0.730612,0.023678,3
3,VAMOS (moocore),zdt1,ZDT,0.701659,0.126892,3
4,VAMOS (moocore),zdt2,ZDT,0.509217,0.048144,3
5,VAMOS (moocore),zdt3,ZDT,0.310414,0.068238,3
6,VAMOS (moocore),zdt4,ZDT,0.308579,0.010884,3
7,VAMOS (numba),dtlz1,DTLZ,0.437945,0.017583,3
8,VAMOS (numba),dtlz2,DTLZ,0.39737,0.016814,3
9,VAMOS (numba),dtlz3,DTLZ,0.476351,0.029237,3


## 3. Generate Tables Grouped by Family

In [7]:
def generate_family_summary(df: pd.DataFrame) -> pd.DataFrame:
    """Generate summary grouped by problem family."""
    family_summary = df.groupby(["framework", "family"]).agg({
        "runtime_seconds": "median"
    }).reset_index()
    family_summary.columns = ["framework", "family", "median_time"]
    
    # Pivot to wide format
    pivot = family_summary.pivot(index="framework", columns="family", values="median_time")
    pivot["Average"] = pivot.mean(axis=1)
    return pivot.round(2)

family_table = generate_family_summary(df)
family_table

family,DTLZ,ZDT,Average
framework,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
VAMOS (moocore),0.62,0.44,0.53
VAMOS (numba),0.43,0.44,0.43
VAMOS (numpy),3.13,4.37,3.75
pymoo,4.53,7.47,6.0


In [8]:
# VAMOS-only table (Table 3)
vamos_backends = ["VAMOS (numpy)", "VAMOS (numba)", "VAMOS (moocore)"]
table3_df = family_table.loc[family_table.index.isin(vamos_backends)].copy()
table3_df.index = table3_df.index.str.replace("VAMOS (", "").str.replace(")", "")
print("Table 3 - VAMOS Backend Comparison (by family):")
table3_df

Table 3 - VAMOS Backend Comparison (by family):


family,DTLZ,ZDT,Average
framework,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
moocore,0.62,0.44,0.53
numba,0.43,0.44,0.43
numpy,3.13,4.37,3.75


In [9]:
# VAMOS vs pymoo table (Table 4)
comparison = ["VAMOS (numba)", "pymoo"]
table4_df = family_table.loc[family_table.index.isin(comparison)].copy()

# Calculate speedup
pymoo_times = table4_df.loc["pymoo"]
numba_times = table4_df.loc["VAMOS (numba)"]
speedup = pymoo_times / numba_times
speedup.name = "Speedup"

print("Table 4 - VAMOS (Numba) vs pymoo (by family):")
print(table4_df)
print(f"\nSpeedup: ZDT={speedup['ZDT']:.1f}x, DTLZ={speedup['DTLZ']:.1f}x, Avg={speedup['Average']:.1f}x")

Table 4 - VAMOS (Numba) vs pymoo (by family):
family         DTLZ   ZDT  Average
framework                         
VAMOS (numba)  0.43  0.44     0.43
pymoo          4.53  7.47     6.00

Speedup: ZDT=17.0x, DTLZ=10.5x, Avg=14.0x


## 4. Generate Detailed Tables (Appendix)

In [10]:
def generate_detailed_table(df: pd.DataFrame) -> pd.DataFrame:
    """Generate detailed table with all problems."""
    detail = df.groupby(["framework", "problem"]).agg({
        "runtime_seconds": "median"
    }).reset_index()
    pivot = detail.pivot(index="framework", columns="problem", values="runtime_seconds")
    pivot["Average"] = pivot.mean(axis=1)
    return pivot.round(2)

detailed_table = generate_detailed_table(df)
print("Detailed Table (Appendix):")
detailed_table

Detailed Table (Appendix):


problem,dtlz1,dtlz2,dtlz3,zdt1,zdt2,zdt3,zdt4,Average
framework,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
VAMOS (moocore),0.61,0.62,0.73,0.7,0.51,0.31,0.31,0.54
VAMOS (numba),0.44,0.4,0.48,0.63,0.46,0.34,0.32,0.44
VAMOS (numpy),3.11,3.01,3.14,4.97,4.72,2.71,2.55,3.46
pymoo,4.53,4.23,4.86,8.6,7.55,4.61,4.1,5.5


## 5. LaTeX Export Functions

In [11]:
def df_to_latex_table(df: pd.DataFrame, caption: str, label: str, 
                      bold_min: bool = True) -> str:
    """Convert DataFrame to LaTeX table."""
    lines = [
        r"\begin{table}[htbp]",
        r"\centering",
        f"\\caption{{{caption}}}",
        f"\\label{{{label}}}",
    ]
    
    # Column spec
    n_cols = len(df.columns) + 1  # +1 for index
    col_spec = "l" + "r" * len(df.columns)
    lines.append(f"\\begin{{tabular}}{{{col_spec}}}")
    lines.append(r"\toprule")
    
    # Header
    header = r"\textbf{Backend}" + " & " + " & ".join([f"\\textbf{{{c}}}" for c in df.columns]) + r" \\"
    lines.append(header)
    lines.append(r"\midrule")
    
    # Find min values per column for bolding
    min_vals = df.min() if bold_min else None
    
    # Data rows
    for idx, row in df.iterrows():
        row_strs = []
        is_best = bold_min and (row == min_vals).all()
        
        for col in df.columns:
            val = row[col]
            if bold_min and val == min_vals[col]:
                row_strs.append(f"\\textbf{{{val:.2f}}}")
            else:
                row_strs.append(f"{val:.2f}")
        
        idx_str = f"\\textbf{{{idx}}}" if is_best else str(idx)
        lines.append(f"{idx_str} & " + " & ".join(row_strs) + r" \\")
    
    lines.append(r"\bottomrule")
    lines.append(r"\end{tabular}")
    lines.append(r"\end{table}")
    
    return "\n".join(lines)

In [12]:
# Generate LaTeX for Table 3
table3_latex = df_to_latex_table(
    table3_df, 
    caption="VAMOS CPU backend comparison: median runtime (seconds) by problem family.",
    label="tab:backends"
)
print(table3_latex)

\begin{table}[htbp]
\centering
\caption{VAMOS CPU backend comparison: median runtime (seconds) by problem family.}
\label{tab:backends}
\begin{tabular}{lrrr}
\toprule
\textbf{Backend} & \textbf{DTLZ} & \textbf{ZDT} & \textbf{Average} \\
\midrule
moocore & 0.62 & \textbf{0.44} & 0.53 \\
\textbf{numba} & \textbf{0.43} & \textbf{0.44} & \textbf{0.43} \\
numpy & 3.13 & 4.37 & 3.75 \\
\bottomrule
\end{tabular}
\end{table}


In [13]:
# Generate LaTeX for Table 4
table4_latex = df_to_latex_table(
    table4_df.T.rename(columns={"VAMOS (numba)": "VAMOS"}),
    caption="VAMOS (Numba) vs pymoo: median runtime (seconds) by problem family.",
    label="tab:frameworks_perf",
    bold_min=True
)
print(table4_latex)

\begin{table}[htbp]
\centering
\caption{VAMOS (Numba) vs pymoo: median runtime (seconds) by problem family.}
\label{tab:frameworks_perf}
\begin{tabular}{lrr}
\toprule
\textbf{Backend} & \textbf{VAMOS} & \textbf{pymoo} \\
\midrule
\textbf{DTLZ} & \textbf{0.43} & \textbf{4.53} \\
ZDT & 0.44 & 7.47 \\
Average & \textbf{0.43} & 6.00 \\
\bottomrule
\end{tabular}
\end{table}


## 6. Update main.tex and Compile PDF

In [14]:
def update_latex_table(tex_path: Path, label: str, new_table: str) -> bool:
    """Replace a table in a LaTeX file by its label."""
    import re
    
    content = tex_path.read_text(encoding="utf-8")
    
    # Find table with this label
    pattern = rf"(\\begin{{table}}.*?\\label{{{label}}}.*?\\end{{table}})"
    match = re.search(pattern, content, re.DOTALL)
    
    if match:
        old_table = match.group(1)
        new_content = content.replace(old_table, new_table)
        tex_path.write_text(new_content, encoding="utf-8")
        print(f"Updated table {label} in {tex_path}")
        return True
    else:
        print(f"Table with label {label} not found")
        return False

In [15]:
def compile_latex(tex_path: Path) -> bool:
    """Compile LaTeX to PDF."""
    import subprocess
    
    cwd = tex_path.parent
    cmd = ["pdflatex", "-interaction=nonstopmode", tex_path.name]
    
    print(f"Compiling {tex_path}...")
    result = subprocess.run(cmd, cwd=cwd, capture_output=True, text=True)
    
    if result.returncode == 0:
        print(f"PDF generated: {cwd / tex_path.stem}.pdf")
        # Clean aux files
        for ext in [".aux", ".log", ".out"]:
            aux = cwd / (tex_path.stem + ext)
            if aux.exists():
                aux.unlink()
        return True
    else:
        print(f"Compilation failed:\n{result.stdout[-500:]}")
        return False

In [16]:
# Update tables in main.tex
UPDATE_LATEX = False  # Set to True to update main.tex

if UPDATE_LATEX:
    update_latex_table(MAIN_TEX, "tab:backends", table3_latex)
    update_latex_table(MAIN_TEX, "tab:frameworks_perf", table4_latex)
    compile_latex(MAIN_TEX)
else:
    print("Set UPDATE_LATEX = True to update main.tex and recompile PDF")

Set UPDATE_LATEX = True to update main.tex and recompile PDF


## 7. Summary Statistics for Paper

In [17]:
# Key statistics for paper text
numba_avg = table3_df.loc["numba", "Average"]
numpy_avg = table3_df.loc["numpy", "Average"]
moocore_avg = table3_df.loc["moocore", "Average"]

pymoo_zdt = family_table.loc["pymoo", "ZDT"]
numba_zdt = family_table.loc["VAMOS (numba)", "ZDT"]
speedup_zdt = pymoo_zdt / numba_zdt

pymoo_dtlz = family_table.loc["pymoo", "DTLZ"]
numba_dtlz = family_table.loc["VAMOS (numba)", "DTLZ"]
speedup_dtlz = pymoo_dtlz / numba_dtlz

print("=" * 50)
print("KEY STATISTICS FOR PAPER")
print("=" * 50)
print(f"Best backend: Numba ({numba_avg:.2f}s avg)")
print(f"NumPy baseline: {numpy_avg:.2f}s avg")
print(f"moocore: {moocore_avg:.2f}s avg")
print()
print(f"Speedup vs pymoo:")
print(f"  ZDT family: {speedup_zdt:.1f}x")
print(f"  DTLZ family: {speedup_dtlz:.1f}x")
print(f"  Overall: {(speedup_zdt + speedup_dtlz) / 2:.1f}x")

KEY STATISTICS FOR PAPER
Best backend: Numba (0.43s avg)
NumPy baseline: 3.75s avg
moocore: 0.53s avg

Speedup vs pymoo:
  ZDT family: 17.0x
  DTLZ family: 10.5x
  Overall: 13.8x
