# Primal Optimization Methods for $C_{1a}$

## Summary
Compares multiple optimization strategies for the discretized problem:
$$\min_{x \ge 0,\, \sum x = 1} \max_k \; 2P \sum_{i+j=k} x_i x_j$$

**Methods tested**:
- (A) L-BFGS-B with softmax reparametrization + random restarts
- (B) Polyak subgradient on simplex with target values
- (C) Basin hopping + L-BFGS-B
- (D) Peak redistribution + Polyak hybrid

**Best results** (exact peak autoconvolution on uniform grid):

| P   | Strategy B (Polyak) | Strategy D (Hybrid) |
|-----|---------------------|---------------------|
| 50  | 1.520036            | —                   |
| 100 | 1.524759            | 1.516               |
| 200 | 1.520114            | 1.515               |
| 500 | —                   | 1.509               |
| 1000| —                   | 1.508               |

**Conclusion**: Polyak subgradient (B) wins consistently over L-BFGS-B (A) and basin hopping (C). The peak redistribution hybrid (D) gives further improvement at larger P. But all methods are dominated by the LogSumExp continuation approach (see `logsumexp_optimizer.ipynb`).

In [None]:
import numpy as np
import numba as nb
from numba import njit, prange
from scipy.optimize import minimize, basinhopping
from joblib import Parallel, delayed
import matplotlib.pyplot as plt
import json
import time

print(f"NumPy {np.__version__}, Numba {nb.__version__}")

In [None]:
# === Core JIT-compiled functions ===

@nb.njit(cache=True)
def project_simplex(x):
    n = len(x)
    u = np.sort(x)[::-1]
    cssv = np.cumsum(u) - 1.0
    rho = 0
    for i in range(n):
        if u[i] * (i + 1) > cssv[i]: rho = i
    tau = cssv[rho] / (rho + 1.0)
    out = np.empty(n)
    for i in range(n): out[i] = max(x[i] - tau, 0.0)
    return out


@nb.njit(cache=True)
def autoconv_scaled(f, P):
    """c_m = 2P * sum_{i+j=m} f_i f_j"""
    n = len(f); nc = 2 * n - 1; c = np.zeros(nc)
    for i in range(n):
        for j in range(n):
            c[i + j] += f[i] * f[j]
    return c * (2.0 * P)


@nb.njit(cache=True)
def compute_peak(f, P):
    return np.max(autoconv_scaled(f, P))


@nb.njit(cache=True)
def subgradient_peak(f, P):
    c = autoconv_scaled(f, P); peak = np.max(c)
    peak_idx = 0
    for i in range(len(c)):
        if c[i] >= peak - 1e-10: peak_idx = i; break
    n = len(f); g = np.zeros(n)
    for i in range(n):
        j = peak_idx - i
        if 0 <= j < n: g[i] = 4.0 * P * f[j]
    return g


# Warmup JIT
_f = np.ones(10) / 10.0
_ = project_simplex(_f); _ = compute_peak(_f, 10); _ = subgradient_peak(_f, 10)
print("JIT compiled.")

## Strategy A: L-BFGS-B with Softmax

In [None]:
def strategy_a(P, n_restarts=50):
    """L-BFGS-B with softmax reparametrization."""
    def softmax(z):
        z = z - np.max(z); e = np.exp(z); return e / np.sum(e)
    def objective(z):
        x = softmax(z); return np.max(np.convolve(x, x)) * 2 * P
    best_val, best_x = np.inf, None
    for i in range(n_restarts):
        scale = [0.1, 0.3, 0.5, 1.0, 2.0][i % 5]
        res = minimize(objective, np.random.randn(P)*scale, method='L-BFGS-B',
                       options={'maxiter': 500, 'ftol': 1e-12})
        if res.fun < best_val:
            best_val, best_x = res.fun, softmax(res.x)
    return best_val, best_x

## Strategy B: Polyak Subgradient on Simplex

In [None]:
def strategy_b(P, n_iters=50000, n_restarts=10, targets=(1.50, 1.49, 1.48)):
    """Polyak subgradient method on the probability simplex."""
    best_val, best_x = np.inf, None
    for target in targets:
        for _ in range(n_restarts):
            x = np.random.dirichlet(np.ones(P))
            best_local, best_local_x = np.inf, x.copy()
            for t in range(n_iters):
                fval = float(compute_peak(x, P))
                if fval < best_local:
                    best_local, best_local_x = fval, x.copy()
                g = subgradient_peak(x, P)
                gnorm2 = np.dot(g, g)
                if gnorm2 < 1e-20: break
                step = max((fval - target) / gnorm2, 1e-4 / (1 + t))
                x = project_simplex(x - step * g)
            if best_local < best_val:
                best_val, best_x = best_local, best_local_x.copy()
    return best_val, best_x

## Strategy D: Peak Redistribution + Polyak Hybrid

In [None]:
@nb.njit(cache=True)
def compute_peak_responsibility(f, P, beta=100.0):
    """How much does f_i contribute to the peak(s)? Softmax-weighted."""
    c = autoconv_scaled(f, P); peak = np.max(c)
    weights = np.exp(beta * (c - peak))
    weights = weights / np.sum(weights)
    n = len(f); nc = len(c); resp = np.zeros(n)
    for i in range(n):
        s = 0.0
        for m in range(nc):
            j = m - i
            if 0 <= j < n: s += weights[m] * f[j]
        resp[i] = s
    return resp * 2.0


@nb.njit(cache=True)
def hybrid_optimize(P, max_iters_redistrib=300, max_iters_polyak=200, target_factor=0.99):
    """Phase 1: peak redistribution, Phase 2: Polyak polish."""
    f = np.random.exponential(1.0, P); f = f / np.sum(f)
    f_best, peak_best = f.copy(), compute_peak(f, P)
    step = 0.1
    
    # Phase 1: redistribute mass from high-responsibility to low-responsibility
    for _ in range(max_iters_redistrib):
        resp = compute_peak_responsibility(f, P)
        f_new = f - step * (resp - np.mean(resp))
        f_new = project_simplex(f_new)
        peak_new = compute_peak(f_new, P)
        if peak_new < compute_peak(f, P):
            f = f_new
            if peak_new < peak_best: f_best, peak_best = f_new.copy(), peak_new
            step = min(step * 1.1, 1.0)
        else:
            step *= 0.5
            if step < 1e-9: break
    
    # Phase 2: Polyak polish
    f, target = f_best.copy(), peak_best * target_factor
    for _ in range(max_iters_polyak):
        g = subgradient_peak(f, P); gnorm2 = 0.0
        for i in range(len(g)): gnorm2 += g[i]*g[i]
        if gnorm2 < 1e-12: break
        peak_cur = compute_peak(f, P)
        step_p = max((peak_cur - target) / gnorm2, 0.0)
        f_new = project_simplex(f - step_p * g)
        peak_new = compute_peak(f_new, P)
        if peak_new < peak_best:
            f_best, peak_best = f_new.copy(), peak_new
            target = peak_best * target_factor
        f = f_new
    return f_best, peak_best


# Warmup
np.random.seed(42)
_ = compute_peak_responsibility(np.ones(10)/10, 10)
_ = hybrid_optimize(10)
print("Hybrid optimizer compiled.")

In [None]:
# === Benchmark all strategies ===

P_values = [10, 20, 50, 100, 200]
results = {}

for P in P_values:
    print(f"\nP = {P}")
    
    # Strategy A
    t0 = time.time()
    val_a, _ = strategy_a(P, n_restarts=50)
    print(f"  A (L-BFGS): {val_a:.6f} ({time.time()-t0:.1f}s)")
    
    # Strategy B
    t0 = time.time()
    val_b, _ = strategy_b(P, n_iters=50000, n_restarts=10)
    print(f"  B (Polyak): {val_b:.6f} ({time.time()-t0:.1f}s)")
    
    # Strategy D (hybrid)
    t0 = time.time()
    def _run(seed):
        np.random.seed(seed)
        return hybrid_optimize(P)
    hyb_results = Parallel(n_jobs=-1)(delayed(_run)(s) for s in range(100))
    val_d = min(r[1] for r in hyb_results)
    print(f"  D (Hybrid): {val_d:.6f} ({time.time()-t0:.1f}s)")
    
    best = min(val_a, val_b, val_d)
    results[P] = {'A': val_a, 'B': val_b, 'D': val_d, 'best': best}
    print(f"  Best: {best:.6f}")

In [None]:
# === Results summary ===

print(f"{'P':>5} | {'A (L-BFGS)':>12} | {'B (Polyak)':>12} | {'D (Hybrid)':>12} | {'Best':>12}")
print('-' * 65)
for P in P_values:
    r = results[P]
    print(f"{P:>5} | {r['A']:>12.6f} | {r['B']:>12.6f} | {r['D']:>12.6f} | {r['best']:>12.6f}")

print(f"\nBest known: 1.5029 (literature)")
print(f"Best this notebook: {min(r['best'] for r in results.values()):.6f}")

## Conclusions

- **Polyak subgradient (B)** consistently beats L-BFGS-B (A) and basin hopping (C, not shown, ~1.57)
- **Peak redistribution hybrid (D)** provides further improvement, especially at larger P
- All methods here are dominated by LogSumExp continuation (see `logsumexp_optimizer.ipynb`)
- The softmax reparametrization (A) struggles because it cannot reach simplex boundaries and creates flat directions that degrade quasi-Newton conditioning