# Operator Efficacy: Understanding Crossover & Mutation

This notebook analyzes how well genetic operators (crossover, mutation) are working for your problem.

## Goals
1.  **Offspring Fitness Distribution**: Compare fitness of parents vs offspring.
2.  **Parent-Offspring Distance**: Measure exploration (how far offspring are from parents).
3.  **Operator Success Rate**: Fraction of offspring that improve over parents.
4.  **Efficacy vs Quality**: Do operators work better on good or bad parents?

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# VAMOS imports
from vamos import ZDT1

## 1. Setup Problem and Operators
We manually implement SBX and Polynomial Mutation to control and measure their effects.

In [None]:
# Problem
problem = ZDT1(n_var=30)
n_var = problem.n_var
xl, xu = problem.xl, problem.xu

def evaluate(X):
    """Evaluate solutions and return objective values."""
    out = {"F": np.zeros((len(X), 2))}
    problem.evaluate(X, out)
    return out["F"]

# -- SBX Crossover --
def sbx_crossover(p1, p2, eta=20.0, prob=0.9):
    """Simulated Binary Crossover."""
    c1, c2 = p1.copy(), p2.copy()
    
    if np.random.rand() > prob:
        return c1, c2
    
    for i in range(len(p1)):
        if np.random.rand() > 0.5:
            continue
        
        u = np.random.rand()
        if u <= 0.5:
            beta = (2 * u) ** (1 / (eta + 1))
        else:
            beta = (1 / (2 * (1 - u))) ** (1 / (eta + 1))
        
        c1[i] = 0.5 * ((1 + beta) * p1[i] + (1 - beta) * p2[i])
        c2[i] = 0.5 * ((1 - beta) * p1[i] + (1 + beta) * p2[i])
        
    # Clip to bounds
    c1 = np.clip(c1, xl, xu)
    c2 = np.clip(c2, xl, xu)
    return c1, c2

# -- Polynomial Mutation --
def pm_mutation(x, eta=20.0, prob=None):
    """Polynomial Mutation."""
    x_out = x.copy()
    if prob is None:
        prob = 1.0 / len(x)
    
    for i in range(len(x)):
        if np.random.rand() > prob:
            continue
        
        u = np.random.rand()
        delta_l = xl[i]
        delta_u = xu[i]
        
        if u < 0.5:
            delta = (2 * u) ** (1 / (eta + 1)) - 1
        else:
            delta = 1 - (2 * (1 - u)) ** (1 / (eta + 1))
        
        x_out[i] = x[i] + delta * (delta_u - delta_l)
        
    return np.clip(x_out, xl, xu)

print(f"Problem: ZDT1 with {n_var} variables")

## 2. Generate Parent Population
We sample random parents and apply operators to create offspring.

In [None]:
np.random.seed(42)
n_parents = 100

# Random parents
parents = np.random.uniform(xl, xu, size=(n_parents, n_var))
parent_F = evaluate(parents)

print(f"Generated {n_parents} parent solutions.")
print(f"Parent objectives: f1 range [{parent_F[:, 0].min():.3f}, {parent_F[:, 0].max():.3f}]")

## 3. Apply Operators and Measure Effects

In [None]:
# -- Crossover Only --
crossover_offspring = []
crossover_distances = []

for i in range(0, n_parents - 1, 2):
    p1, p2 = parents[i], parents[i + 1]
    c1, c2 = sbx_crossover(p1, p2)
    crossover_offspring.extend([c1, c2])
    crossover_distances.append(np.linalg.norm(c1 - p1))
    crossover_distances.append(np.linalg.norm(c2 - p2))

crossover_offspring = np.array(crossover_offspring)
crossover_F = evaluate(crossover_offspring)

# -- Mutation Only --
mutation_offspring = []
mutation_distances = []

for p in parents:
    m = pm_mutation(p)
    mutation_offspring.append(m)
    mutation_distances.append(np.linalg.norm(m - p))

mutation_offspring = np.array(mutation_offspring)
mutation_F = evaluate(mutation_offspring)

print(f"Crossover offspring: {len(crossover_offspring)}")
print(f"Mutation offspring: {len(mutation_offspring)}")

## 4. Visualize Fitness Distributions

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# f1 distribution
axes[0].hist(parent_F[:, 0], bins=20, alpha=0.5, label='Parents', color='blue')
axes[0].hist(crossover_F[:, 0], bins=20, alpha=0.5, label='Crossover', color='green')
axes[0].hist(mutation_F[:, 0], bins=20, alpha=0.5, label='Mutation', color='red')
axes[0].set_xlabel('$f_1$')
axes[0].set_ylabel('Count')
axes[0].set_title('Objective $f_1$ Distribution')
axes[0].legend()
axes[0].grid(True, alpha=0.3)

# f2 distribution
axes[1].hist(parent_F[:, 1], bins=20, alpha=0.5, label='Parents', color='blue')
axes[1].hist(crossover_F[:, 1], bins=20, alpha=0.5, label='Crossover', color='green')
axes[1].hist(mutation_F[:, 1], bins=20, alpha=0.5, label='Mutation', color='red')
axes[1].set_xlabel('$f_2$')
axes[1].set_ylabel('Count')
axes[1].set_title('Objective $f_2$ Distribution')
axes[1].legend()
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## 5. Parent-Offspring Distance (Exploration)

In [None]:
plt.figure(figsize=(10, 5))

data = pd.DataFrame({
    'Operator': ['Crossover'] * len(crossover_distances) + ['Mutation'] * len(mutation_distances),
    'Distance': crossover_distances + mutation_distances
})

sns.boxplot(data=data, x='Operator', y='Distance', palette='Set2')
plt.title('Parent-Offspring Distance in Decision Space')
plt.ylabel('Euclidean Distance')
plt.grid(True, alpha=0.3, axis='y')
plt.show()

print(f"Mean Crossover Distance: {np.mean(crossover_distances):.4f}")
print(f"Mean Mutation Distance: {np.mean(mutation_distances):.4f}")

## 6. Operator Success Rate
An offspring is "successful" if it dominates or is non-dominated by its parent(s).

In [None]:
def dominates(a, b):
    """Returns True if a dominates b (minimization)."""
    return np.all(a <= b) and np.any(a < b)

def is_improving(parent_f, offspring_f):
    """Check if offspring is better or non-dominated."""
    if dominates(offspring_f, parent_f):
        return 2  # Offspring dominates
    elif dominates(parent_f, offspring_f):
        return 0  # Parent dominates
    else:
        return 1  # Non-dominated

# Mutation success
mutation_success = [is_improving(parent_F[i], mutation_F[i]) for i in range(len(parents))]

# Crossover success (compare each offspring to its first parent)
crossover_success = []
for i in range(0, n_parents - 1, 2):
    crossover_success.append(is_improving(parent_F[i], crossover_F[i]))
    crossover_success.append(is_improving(parent_F[i + 1], crossover_F[i + 1]))

def summarize_success(results, name):
    total = len(results)
    worse = results.count(0)
    neutral = results.count(1)
    better = results.count(2)
    print(f"{name}:")
    print(f"  Better (Dominates Parent): {better} ({100*better/total:.1f}%)")
    print(f"  Neutral (Non-dominated): {neutral} ({100*neutral/total:.1f}%)")
    print(f"  Worse (Dominated by Parent): {worse} ({100*worse/total:.1f}%)")

summarize_success(crossover_success, "Crossover")
print()
summarize_success(mutation_success, "Mutation")

In [None]:
# Visualize as stacked bar
labels = ['Crossover', 'Mutation']
better = [crossover_success.count(2), mutation_success.count(2)]
neutral = [crossover_success.count(1), mutation_success.count(1)]
worse = [crossover_success.count(0), mutation_success.count(0)]

x = np.arange(len(labels))
width = 0.5

fig, ax = plt.subplots(figsize=(8, 5))
ax.bar(x, better, width, label='Better', color='green')
ax.bar(x, neutral, width, bottom=better, label='Neutral', color='gray')
ax.bar(x, worse, width, bottom=np.array(better) + np.array(neutral), label='Worse', color='red')

ax.set_ylabel('Count')
ax.set_title('Operator Success Rate')
ax.set_xticks(x)
ax.set_xticklabels(labels)
ax.legend()
ax.grid(True, alpha=0.3, axis='y')
plt.show()

## 7. Efficacy vs Parent Quality
Do operators work better on "good" parents or "bad" parents?
We bin parents by their $f_1$ value (approximation of quality) and check the improvement.

> **Hypothesis**: Mutation makes small steps, so it should be safer for good parents. Crossover makes big leaps, which might be destructive for highly optimized parents but great for exploring from poor parents.

In [None]:
# Metric: Improvement in f1 (Parent f1 - Offspring f1). Positive = Better.
mut_improvement = parent_F[:, 0] - mutation_F[:, 0]

plt.figure(figsize=(10, 6))
sc = plt.scatter(parent_F[:, 0], mut_improvement, c=mut_improvement, cmap='RdYlGn', alpha=0.7, edgecolor='gray')
plt.axhline(0, color='black', linestyle='--', alpha=0.5)
plt.xlabel("Parent $f_1$ (Lower is Better ->)")
plt.ylabel("Improvement ($f_1^{parent} - f_1^{offspring}$)")
plt.title("Mutation Efficacy vs Parent Quality")
plt.colorbar(sc, label="Improvement magnitude")
plt.grid(True, alpha=0.3)
plt.text(parent_F[:, 0].min(), max(mut_improvement), "Good Parents", fontsize=10, verticalalignment='top')
plt.text(parent_F[:, 0].max(), max(mut_improvement), "Poor Parents", fontsize=10, verticalalignment='top', horizontalalignment='right')
plt.show()