# Ablation Study: Deconstructing NSGA-II

An **ablation study** helps understand which components of an algorithm contribute most to its performance.
In this notebook, we compare standard NSGA-II against simplified variants on the **ZDT1** benchmark.

## Variants
1.  **Full**: Standard NSGA-II (SBX Crossover + PM Mutation + Tournament Selection + Rank Survival).
2.  **No Crossover**: Mutation only (essentially a (1+1)-EA or local search variant).
3.  **No Mutation**: Crossover only (pure recombination).
4.  **No Selection**: Random selection (Tournament size = 1). Evaluation of pure drift/exploration.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import wilcoxon

# VAMOS imports
from vamos.api import optimize, OptimizeConfig
from vamos.engine.algorithm.config import NSGAIIConfig
from vamos.foundation.problem.registry import make_problem_selection
from vamos.foundation.metrics.hypervolume import compute_hypervolume

## 1. Experiment Runner
We define a helper to run an ablation variant multiple times for statistical significance.

In [None]:
def run_variant(variant_name, config, problem_key="zdt1", n_runs=10, max_evals=2000):
    scores = []

    for seed in range(n_runs):
        # Instantiate problem
        selection = make_problem_selection(problem_key)
        problem = selection.instantiate()

        # Run optimize
        res = optimize(
            OptimizeConfig(
                problem=problem, algorithm="nsgaii", algorithm_config=config.fixed(), termination=("n_eval", max_evals), seed=seed
            )
        )

        # Compute HV
        if res.F is not None and len(res.F) > 0:
            hv = compute_hypervolume(res.F, ref_point=np.array([1.1, 1.1]))
        else:
            hv = 0.0
        scores.append(hv)

    return scores


# Define variants
# 1. Full
cfg_full = NSGAIIConfig().pop_size(40).crossover("sbx", prob=0.9).mutation("pm", prob=0.1).selection("tournament", pressure=2)

# 2. No Crossover (Prob=0.0)
cfg_no_cross = NSGAIIConfig().pop_size(40).crossover("sbx", prob=0.0).mutation("pm", prob=0.1).selection("tournament", pressure=2)

# 3. No Mutation (Prob=0.0)
cfg_no_mut = NSGAIIConfig().pop_size(40).crossover("sbx", prob=0.9).mutation("pm", prob=0.0).selection("tournament", pressure=2)

# 4. No Selection (Random, Pressure=1)
# Note: Tournament of size 1 picks 1 candidate at random, effectively random mating.
cfg_no_sel = NSGAIIConfig().pop_size(40).crossover("sbx", prob=0.9).mutation("pm", prob=0.1).selection("tournament", pressure=1)

variants = {"Full (NSGA-II)": cfg_full, "No Crossover": cfg_no_cross, "No Mutation": cfg_no_mut, "No Selection": cfg_no_sel}

## 2. Execute Comparison
Running the benchmark...

In [None]:
results_list = []

print("Running Ablation Study...")
for name, cfg in variants.items():
    print(f"  Running variant: {name}")
    scores = run_variant(name, cfg, n_runs=10)
    for s in scores:
        results_list.append({"Variant": name, "Hypervolume": s})

df_ablation = pd.DataFrame(results_list)

## 3. Visualization
Box plots show the distribution of performance for each variant.

In [None]:
plt.figure(figsize=(10, 6))
sns.boxplot(data=df_ablation, x="Variant", y="Hypervolume", palette="Set2")
sns.stripplot(data=df_ablation, x="Variant", y="Hypervolume", color="black", alpha=0.3)
plt.title("Ablation Study: Component Contributions on ZDT1")
plt.ylim(0, 1.2)
plt.grid(True, alpha=0.3)
plt.show()

summary = df_ablation.groupby("Variant")["Hypervolume"].agg(["mean", "std"])
print("\nSummary Statistics:")
print(summary.sort_values("mean", ascending=False))

## 4. Statistical Significance (Wilcoxon Test)
We perform pairwise Wilcoxon signed-rank tests to see if observed differences are statistically significant.

In [None]:
variants_keys = list(variants.keys())

print("Pairwise Wilcoxon Tests (p-values):")
for i in range(len(variants_keys)):
    for j in range(i + 1, len(variants_keys)):
        name_a = variants_keys[i]
        name_b = variants_keys[j]

        # Extract scores (ensure they are aligned by seed, as our runner loops seeds 0..N)
        data_a = df_ablation[df_ablation["Variant"] == name_a]["Hypervolume"].values
        data_b = df_ablation[df_ablation["Variant"] == name_b]["Hypervolume"].values

        try:
            stat, p_val = wilcoxon(data_a, data_b)
            significance = "***" if p_val < 0.001 else "**" if p_val < 0.01 else "*" if p_val < 0.05 else "ns"
            print(f"{name_a} vs {name_b}: p={p_val:.5f} ({significance})")
        except ValueError as e:
            # wilcoxon throws if all differences are zero
            if np.all(data_a == data_b):
                print(f"{name_a} vs {name_b}: Identical results (p=1.0)")
            else:
                print(f"{name_a} vs {name_b}: Error ({e})")