# VAMOS Benchmark for SWEVO Paper

This notebook reproduces benchmark experiments and generates LaTeX tables for the paper.

**Features:**
- Family-grouped tables (ZDT, DTLZ) for main body
- Detailed per-problem tables for appendix with **row-wise best** marking
- Automatic main.tex update and PDF compilation

In [1]:
import subprocess
import pandas as pd
import numpy as np
import re
from pathlib import Path

# Paths (notebook is in paper/)
PAPER_DIR = Path("manuscript")
DATA_DIR = Path("..") / "experiments"
MAIN_TEX = PAPER_DIR / "main.tex"

## 1. Load Benchmark Data

In [2]:
# Load data
df = pd.read_csv(DATA_DIR / "benchmark_extended.csv")
print(f"Loaded {len(df)} rows")

# Classify problem family
def get_family(p): 
    return 'ZDT' if p.startswith('zdt') else 'DTLZ'

df['family'] = df['problem'].apply(get_family)
df.head()

Loaded 84 rows


Unnamed: 0,framework,problem,algorithm,n_evals,seed,runtime_seconds,n_solutions,hv,family
0,VAMOS (numpy),zdt1,NSGA-II,100000,0,4.96591,100,,ZDT
1,VAMOS (numba),zdt1,NSGA-II,100000,0,2.055119,100,,ZDT
2,VAMOS (moocore),zdt1,NSGA-II,100000,0,0.515945,100,,ZDT
3,pymoo,zdt1,NSGA-II,100000,0,8.601611,100,,ZDT
4,VAMOS (numpy),zdt1,NSGA-II,100000,1,5.746121,100,,ZDT


## 2. Generate Family-Grouped Tables (Main Body)

In [3]:
# Family summary
family = df.groupby(['framework', 'family'])['runtime_seconds'].median().unstack()
family['Average'] = family.mean(axis=1)

# VAMOS backends only
vamos_family = family.loc[family.index.str.contains('VAMOS')].copy()
vamos_family.index = vamos_family.index.str.replace('VAMOS (', '').str.replace(')', '')

print("Table 3 - VAMOS Backend Comparison (by family):")
display(vamos_family.round(2))

Table 3 - VAMOS Backend Comparison (by family):


family,DTLZ,ZDT,Average
framework,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
moocore,0.62,0.44,0.53
numba,0.43,0.44,0.43
numpy,3.13,4.37,3.75


In [4]:
# VAMOS vs pymoo
comparison_family = family.loc[family.index.isin(['VAMOS (numba)', 'pymoo'])].copy()

# Speedup
pymoo_t = comparison_family.loc['pymoo']
numba_t = comparison_family.loc['VAMOS (numba)']
speedup = pymoo_t / numba_t

print("Table 4 - VAMOS (Numba) vs pymoo:")
display(comparison_family.round(2))
print(f"\nSpeedup: ZDT={speedup['ZDT']:.1f}x, DTLZ={speedup['DTLZ']:.1f}x, Avg={speedup['Average']:.1f}x")

Table 4 - VAMOS (Numba) vs pymoo:


family,DTLZ,ZDT,Average
framework,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
VAMOS (numba),0.43,0.44,0.43
pymoo,4.53,7.47,6.0



Speedup: ZDT=17.1x, DTLZ=10.5x, Avg=13.8x


## 3. Generate Detailed Tables (Appendix) - Row-wise Best

In [5]:
# Detailed: problems as rows, backends as columns
detail = df.groupby(['framework', 'problem'])['runtime_seconds'].median().unstack()

# Table A.1: VAMOS backends (transposed)
backends_detail = detail.loc[detail.index.str.contains('VAMOS')].T.copy()
backends_detail.columns = backends_detail.columns.str.replace('VAMOS (', '').str.replace(')', '')

# Add average row
avg_row = backends_detail.mean()
avg_row.name = 'Average'
backends_detail = pd.concat([backends_detail, avg_row.to_frame().T])

print("Table A.1 - Backends (problems as rows):")
display(backends_detail.round(2))

Table A.1 - Backends (problems as rows):


framework,moocore,numba,numpy
dtlz1,0.61,0.44,3.11
dtlz2,0.62,0.4,3.01
dtlz3,0.73,0.48,3.14
zdt1,0.7,0.63,4.97
zdt2,0.51,0.46,4.72
zdt3,0.31,0.34,2.71
zdt4,0.31,0.32,2.55
Average,0.54,0.44,3.46


In [6]:
# Table A.2: VAMOS vs pymoo (transposed with speedup)
comparison_detail = detail.loc[detail.index.isin(['VAMOS (numba)', 'pymoo'])].T.copy()
comparison_detail.columns = ['VAMOS', 'pymoo']
comparison_detail = comparison_detail[['pymoo', 'VAMOS']]  # reorder
comparison_detail['Speedup'] = comparison_detail['pymoo'] / comparison_detail['VAMOS']

# Add average row
avg_row = pd.Series({
    'pymoo': comparison_detail['pymoo'].mean(),
    'VAMOS': comparison_detail['VAMOS'].mean(),
    'Speedup': comparison_detail['pymoo'].mean() / comparison_detail['VAMOS'].mean()
}, name='Average')
comparison_detail = pd.concat([comparison_detail, avg_row.to_frame().T])

print("Table A.2 - Comparison with Speedup:")
display(comparison_detail.round(2))

Table A.2 - Comparison with Speedup:


Unnamed: 0,pymoo,VAMOS,Speedup
dtlz1,4.53,0.44,10.34
dtlz2,4.23,0.4,10.64
dtlz3,4.86,0.48,10.2
zdt1,8.6,0.63,13.72
zdt2,7.55,0.46,16.46
zdt3,4.61,0.34,13.39
zdt4,4.1,0.32,12.78
Average,5.5,0.44,12.56


## 4. LaTeX Generation with Row-wise Best

In [7]:
def make_latex_table_a1(df):
    """Table A.1: backends with row-wise minimum bolded."""
    lines = [
        r"\begin{table}[htbp]",
        r"\centering",
        r"\caption{Detailed VAMOS backend comparison: median runtime (seconds) per problem.}",
        r"\label{tab:detailed_backends}",
        r"\begin{tabular}{l|rrr}",
        r"\toprule",
        r"\textbf{Problem} & \textbf{NumPy} & \textbf{moocore} & \textbf{Numba} \\",
        r"\midrule",
    ]
    
    for idx, row in df.iterrows():
        vals = {'numpy': row['numpy'], 'moocore': row['moocore'], 'numba': row['numba']}
        min_val = min(vals.values())
        
        row_str = []
        for col in ['numpy', 'moocore', 'numba']:
            v = vals[col]
            if v == min_val:
                row_str.append(f"\\textbf{{{v:.2f}}}")
            else:
                row_str.append(f"{v:.2f}")
        
        if idx == 'Average':
            lines.append(r"\midrule")
            lines.append(f"\\textbf{{Average}} & {' & '.join(row_str)} \\\\")
        else:
            lines.append(f"{idx} & {' & '.join(row_str)} \\\\")
    
    lines.extend([r"\bottomrule", r"\end{tabular}", r"\end{table}"])
    return "\n".join(lines)

def make_latex_table_a2(df):
    """Table A.2: comparison with speedup, row-wise minimum bolded."""
    lines = [
        r"\begin{table}[htbp]",
        r"\centering",
        r"\caption{Detailed VAMOS vs pymoo comparison: median runtime (seconds) and speedup.}",
        r"\label{tab:detailed_comparison}",
        r"\begin{tabular}{l|rr|r}",
        r"\toprule",
        r"\textbf{Problem} & \textbf{pymoo} & \textbf{VAMOS} & \textbf{Speedup} \\",
        r"\midrule",
    ]
    
    for idx, row in df.iterrows():
        pymoo_v = row['pymoo']
        vamos_v = row['VAMOS']
        speedup = row['Speedup']
        
        # Bold the minimum (faster one)
        if vamos_v < pymoo_v:
            vamos_str = f"\\textbf{{{vamos_v:.2f}}}"
            pymoo_str = f"{pymoo_v:.2f}"
        else:
            vamos_str = f"{vamos_v:.2f}"
            pymoo_str = f"\\textbf{{{pymoo_v:.2f}}}"
        
        if idx == 'Average':
            lines.append(r"\midrule")
            lines.append(f"\\textbf{{Average}} & {pymoo_str} & {vamos_str} & \\textbf{{{speedup:.1f}$\\times$}} \\\\")
        else:
            lines.append(f"{idx} & {pymoo_str} & {vamos_str} & {speedup:.1f}$\\times$ \\\\")
    
    lines.extend([r"\bottomrule", r"\end{tabular}", r"\end{table}"])
    return "\n".join(lines)

In [8]:
# Generate LaTeX
table_a1_latex = make_latex_table_a1(backends_detail)
table_a2_latex = make_latex_table_a2(comparison_detail)

print("=" * 60)
print("TABLE A.1 - Backends (row-wise best)")
print("=" * 60)
print(table_a1_latex)
print()
print("=" * 60)
print("TABLE A.2 - Comparison")
print("=" * 60)
print(table_a2_latex)

TABLE A.1 - Backends (row-wise best)
\begin{table}[htbp]
\centering
\caption{Detailed VAMOS backend comparison: median runtime (seconds) per problem.}
\label{tab:detailed_backends}
\begin{tabular}{l|rrr}
\toprule
\textbf{Problem} & \textbf{NumPy} & \textbf{moocore} & \textbf{Numba} \\
\midrule
dtlz1 & 3.11 & 0.61 & \textbf{0.44} \\
dtlz2 & 3.01 & 0.62 & \textbf{0.40} \\
dtlz3 & 3.14 & 0.73 & \textbf{0.48} \\
zdt1 & 4.97 & 0.70 & \textbf{0.63} \\
zdt2 & 4.72 & 0.51 & \textbf{0.46} \\
zdt3 & 2.71 & \textbf{0.31} & 0.34 \\
zdt4 & 2.55 & \textbf{0.31} & 0.32 \\
\midrule
\textbf{Average} & 3.46 & 0.54 & \textbf{0.44} \\
\bottomrule
\end{tabular}
\end{table}

TABLE A.2 - Comparison
\begin{table}[htbp]
\centering
\caption{Detailed VAMOS vs pymoo comparison: median runtime (seconds) and speedup.}
\label{tab:detailed_comparison}
\begin{tabular}{l|rr|r}
\toprule
\textbf{Problem} & \textbf{pymoo} & \textbf{VAMOS} & \textbf{Speedup} \\
\midrule
dtlz1 & 4.53 & \textbf{0.44} & 10.3$\times$ \\
dtlz2 

## 5. Update main.tex and Compile PDF

In [9]:
def replace_table_in_tex(content: str, label: str, new_table: str) -> str:
    """Replace a table in LaTeX content by its label."""
    pattern = r"\\begin\{table\}.*?\\label\{" + re.escape(label) + r"\}.*?\\end\{table\}"
    match = re.search(pattern, content, re.DOTALL)
    if match:
        return content[:match.start()] + new_table + content[match.end():]
    print(f"Warning: Table {label} not found")
    return content

def compile_latex(tex_path: Path) -> bool:
    """Compile LaTeX to PDF."""
    result = subprocess.run(
        ['pdflatex', '-interaction=nonstopmode', tex_path.name],
        cwd=tex_path.parent,
        capture_output=True, text=True
    )
    # Clean aux files
    for ext in ['.aux', '.log', '.out']:
        aux = tex_path.parent / (tex_path.stem + ext)
        try:
            aux.unlink()
        except:
            pass
    return result.returncode == 0

In [10]:
# Set to True to update main.tex and recompile
UPDATE_LATEX = False

if UPDATE_LATEX:
    content = MAIN_TEX.read_text(encoding='utf-8')
    original_len = len(content)
    
    # Replace appendix tables
    content = replace_table_in_tex(content, 'tab:detailed_backends', table_a1_latex)
    content = replace_table_in_tex(content, 'tab:detailed_comparison', table_a2_latex)
    
    # Safety check
    if len(content) >= original_len * 0.9:
        MAIN_TEX.write_text(content, encoding='utf-8')
        print(f"main.tex updated ({len(content)} bytes)")
        
        if compile_latex(MAIN_TEX):
            print(f"PDF compiled: {MAIN_TEX.parent / 'main.pdf'}")
        else:
            print("PDF compilation failed")
    else:
        print(f"ERROR: Content too short, skipping write")
else:
    print("Set UPDATE_LATEX = True to update main.tex and recompile PDF")

Set UPDATE_LATEX = True to update main.tex and recompile PDF


## 6. Summary Statistics

In [11]:
print("=" * 50)
print("KEY STATISTICS FOR PAPER")
print("=" * 50)
print(f"Best backend: Numba ({vamos_family.loc['numba', 'Average']:.2f}s avg)")
print(f"NumPy baseline: {vamos_family.loc['numpy', 'Average']:.2f}s avg")
print(f"moocore: {vamos_family.loc['moocore', 'Average']:.2f}s avg")
print()
print(f"Speedup vs pymoo:")
print(f"  ZDT family: {speedup['ZDT']:.1f}x")
print(f"  DTLZ family: {speedup['DTLZ']:.1f}x")
print(f"  Overall: {speedup['Average']:.1f}x")

KEY STATISTICS FOR PAPER
Best backend: Numba (0.43s avg)
NumPy baseline: 3.75s avg
moocore: 0.53s avg

Speedup vs pymoo:
  ZDT family: 17.1x
  DTLZ family: 10.5x
  Overall: 13.8x
