# Joint Edge + Height Optimization for Autoconvolution Minimization

Optimizes **both bin edges and heights** simultaneously as one parameter vector
`theta = (gamma, eta)`, avoiding any grid interpolation.

### Method
- **Parametrization**: `gamma` controls bin widths via softplus + normalization;
  `eta` controls bin heights via softplus + normalization. All constraints
  (positive widths summing to 0.5, positive heights with integral 1) are
  satisfied by construction.
- **Objective**: LogSumExp smooth approximation to `max_t (f*f)(t)`, with
  smooth log-barrier penalty on width ratio to prevent degenerate bins.
- **Speed**: Autoconvolution computed via **analytical Fourier transform** of
  the step function — O(NP) per evaluation vs O(P^4) for exact breakpoint
  evaluation. Exact breakpoint evaluation used only for final verification.
- **Gradients**: Analytical backprop through LSE → irfft → F² → Fourier coeffs →
  reparametrization. O(NP + N log N) per gradient vs O(NP²) for finite differences.
- **Optimizer**: L-BFGS-B with analytical gradients, beta-continuation
  from beta=1 to beta=2000+, multi-start with 30-50 random restarts.
- **Width ratio penalty**: smooth p-norm ratio approximating max(w)/min(w),
  with log-barrier `λ·log(1 + (ratio/R)²)`.

In [1]:
import numpy as np
import numba as nb
from numba import njit
from scipy.optimize import minimize
from joblib import Parallel, delayed
import json, os, time

print(f'Numba {nb.__version__}, NumPy {np.__version__}')
print(f'CPU cores: {os.cpu_count()}')

Numba 0.63.1, NumPy 2.3.5
CPU cores: 16


## Core Numba Kernels

Softplus reparametrization, exact autoconvolution at breakpoints (for verification).

In [2]:
@njit(cache=True)
def softplus(x):
    if x > 20.0:
        return x
    elif x < -20.0:
        return np.exp(x)
    else:
        return np.log1p(np.exp(x))


@njit(cache=True)
def softplus_vec(x):
    out = np.empty(len(x))
    for i in range(len(x)):
        out[i] = softplus(x[i])
    return out


@njit(cache=True)
def sigmoid(x):
    """Numerically stable sigmoid = softplus'."""
    if x >= 0.0:
        return 1.0 / (1.0 + np.exp(-x))
    else:
        ex = np.exp(x)
        return ex / (1.0 + ex)


@njit(cache=True)
def theta_to_edges_heights(gamma, eta):
    """Convert unconstrained (gamma, eta) -> (edges, heights, widths).
    gamma -> widths via softplus + normalize to sum=0.5
    eta   -> heights via softplus + normalize so sum(h*w)=1
    """
    P = len(gamma)
    raw_w = softplus_vec(gamma)
    w_sum = 0.0
    for i in range(P):
        w_sum += raw_w[i]
    widths = np.empty(P)
    for i in range(P):
        widths[i] = 0.5 * raw_w[i] / w_sum

    edges = np.empty(P + 1)
    edges[0] = -0.25
    for i in range(P):
        edges[i + 1] = edges[i] + widths[i]
    edges[P] = 0.25  # force exact endpoint

    raw_h = softplus_vec(eta)
    hw_sum = 0.0
    for i in range(P):
        hw_sum += raw_h[i] * widths[i]
    heights = np.empty(P)
    for i in range(P):
        heights[i] = raw_h[i] / hw_sum

    return edges, heights, widths


@njit(cache=True)
def peak_exact(edges, heights):
    """Exact peak of (f*f)(t) by evaluating at all O(P^2) breakpoints."""
    P = len(heights)
    P1 = P + 1
    a = edges[:-1]
    b = edges[1:]

    # Collect unique breakpoints
    raw = np.empty(P1 * P1)
    k = 0
    for i in range(P1):
        for j in range(P1):
            raw[k] = edges[i] + edges[j]
            k += 1
    raw = np.sort(raw)
    bp = np.empty(len(raw))
    bp[0] = raw[0]
    n_bp = 1
    for i in range(1, len(raw)):
        if raw[i] - bp[n_bp - 1] > 1e-15:
            bp[n_bp] = raw[i]
            n_bp += 1

    mx = -1e300
    for ti in range(n_bp):
        t = bp[ti]
        total = 0.0
        for i in range(P):
            for j in range(P):
                lo = a[i] if a[i] > t - b[j] else t - b[j]
                hi = b[i] if b[i] < t - a[j] else t - a[j]
                if hi > lo:
                    total += heights[i] * heights[j] * (hi - lo)
        if total > mx:
            mx = total
    return mx


# JIT warmup
_g = np.zeros(5)
_e, _h, _w = theta_to_edges_heights(_g, _g)
_p = peak_exact(_e, _h)
_ = sigmoid(0.0)
print(f'Uniform P=5: exact peak = {_p:.6f} (expected ~2.0 for constant f=2)')
print('Numba kernels compiled.')

Uniform P=5: exact peak = 2.000000 (expected ~2.0 for constant f=2)
Numba kernels compiled.


## FFT-Based Autoconvolution via Analytical Fourier Transform

For a step function $f(x) = h_i$ on $[e_i, e_{i+1})$, the Fourier transform is:
$$\hat{F}(\omega) = \sum_i h_i \frac{e^{-j\omega e_i} - e^{-j\omega e_{i+1}}}{j\omega}$$

The autoconvolution Fourier series coefficients are $\hat{g}_k = \hat{F}(2\pi k)^2$.
Inverse FFT recovers $(f*f)(t)$ on a uniform grid. This is $O(NP)$ per evaluation
instead of $O(P^4)$ for exact breakpoint evaluation.

**Analytical gradients**: Backprop through the chain
LSE → fftshift → irfft → complex squaring → Fourier coefficients → reparametrization,
giving exact gradients in $O(NP + N\log N)$ vs $O(NP^2)$ for finite differences.

In [3]:
@njit(cache=True)
def fourier_coeffs(edges, heights, n_freq):
    """Compute Fourier transform F_hat(2*pi*k) for k=0..n_freq-1.
    Returns real and imaginary parts as separate arrays.
    """
    P = len(heights)
    re = np.zeros(n_freq)
    im = np.zeros(n_freq)
    re[0] = 1.0  # DC = integral f = 1

    for k in range(1, n_freq):
        omega = 2.0 * np.pi * k
        sr = 0.0
        si = 0.0
        for i in range(P):
            c0 = np.cos(omega * edges[i])
            s0 = np.sin(omega * edges[i])
            c1 = np.cos(omega * edges[i + 1])
            s1 = np.sin(omega * edges[i + 1])
            a = c0 - c1
            b = s1 - s0
            sr += heights[i] * b / omega
            si += heights[i] * (-a) / omega
        re[k] = sr
        im[k] = si
    return re, im


@njit(cache=True)
def fourier_backward(edges, heights, d_F_re, d_F_im, n_freq):
    """Backward pass: gradients of loss w.r.t. edges and heights,
    given gradients w.r.t. Fourier coefficients (d_F_re, d_F_im).
    """
    P = len(heights)
    d_edges = np.zeros(P + 1)
    d_heights = np.zeros(P)

    for k in range(1, n_freq):
        omega = 2.0 * np.pi * k
        for i in range(P):
            cos_lo = np.cos(omega * edges[i])
            sin_lo = np.sin(omega * edges[i])
            cos_hi = np.cos(omega * edges[i + 1])
            sin_hi = np.sin(omega * edges[i + 1])

            # dL/dh_i: F_re += h_i*(sin_hi-sin_lo)/w, F_im += h_i*(cos_hi-cos_lo)/w
            d_heights[i] += (d_F_re[k] * (sin_hi - sin_lo)
                             + d_F_im[k] * (cos_hi - cos_lo)) / omega

            # dL/de_{i+1}: d/de sin(we)/w = cos(we), d/de cos(we)/w = -sin(we)
            d_edges[i + 1] += heights[i] * (
                d_F_re[k] * cos_hi - d_F_im[k] * sin_hi)

            # dL/de_i: sign flips (these terms enter with minus)
            d_edges[i] += heights[i] * (
                -d_F_re[k] * cos_lo + d_F_im[k] * sin_lo)

    return d_edges, d_heights


@njit(cache=True)
def reparam_backward(gamma, eta, widths, heights, d_edges, d_heights,
                     d_penalty_widths):
    """Backward pass through theta_to_edges_heights reparametrization.
    Combines gradients from edges, heights, and the penalty term on widths
    to produce gradients w.r.t. the unconstrained parameters (gamma, eta).
    """
    P = len(gamma)

    # Recompute intermediates needed for backward pass
    raw_w = softplus_vec(gamma)
    w_sum = 0.0
    for i in range(P):
        w_sum += raw_w[i]

    raw_h = softplus_vec(eta)
    hw_sum = 0.0
    for i in range(P):
        hw_sum += raw_h[i] * widths[i]

    # --- d_edges -> d_widths via reverse cumsum ---
    # edges[j] = -0.25 + sum_{i<j} widths[i], for j=1..P-1
    # edges[0]=-0.25 and edges[P]=0.25 are constants -> discard their grads
    d_widths = np.zeros(P)
    acc = 0.0
    for j in range(P - 1, 0, -1):
        acc += d_edges[j]
        d_widths[j - 1] = acc
    # d_widths[P-1] stays 0 (edges[P] is forced constant)

    # --- d_heights -> d_raw_h and additional d_widths ---
    # heights_i = raw_h_i / hw_sum, hw_sum = sum(raw_h * widths)
    S_dh = 0.0
    for i in range(P):
        S_dh += d_heights[i] * heights[i]

    d_raw_h = np.empty(P)
    for i in range(P):
        d_raw_h[i] = (d_heights[i] - widths[i] * S_dh) / hw_sum
        d_widths[i] += -heights[i] * S_dh

    # --- Add penalty gradient on widths ---
    for i in range(P):
        d_widths[i] += d_penalty_widths[i]

    # --- d_widths -> d_raw_w ---
    # widths_i = 0.5 * raw_w_i / w_sum
    S_dw = 0.0
    for i in range(P):
        S_dw += d_widths[i] * widths[i]

    d_raw_w = np.empty(P)
    for i in range(P):
        d_raw_w[i] = (0.5 / w_sum) * (d_widths[i] - 2.0 * S_dw)

    # --- Through softplus: d_gamma = d_raw_w * sigmoid(gamma), same for eta ---
    d_gamma = np.empty(P)
    d_eta = np.empty(P)
    for i in range(P):
        d_gamma[i] = d_raw_w[i] * sigmoid(gamma[i])
        d_eta[i] = d_raw_h[i] * sigmoid(eta[i])

    return d_gamma, d_eta


@njit(cache=True)
def square_complex(re, im):
    """Compute (re + j*im)^2 = (re^2 - im^2) + j*(2*re*im)."""
    n = len(re)
    out_re = np.empty(n)
    out_im = np.empty(n)
    for i in range(n):
        out_re[i] = re[i] * re[i] - im[i] * im[i]
        out_im[i] = 2.0 * re[i] * im[i]
    return out_re, out_im


def fft_autoconv(edges, heights, N_fft=16384):
    """Compute autoconvolution on a grid via analytical Fourier transform.
    Returns array of (f*f)(t) values at t = -0.5 + m/N_fft, m=0..N_fft-1.
    """
    n_freq = N_fft // 2 + 1
    f_re, f_im = fourier_coeffs(edges, heights, n_freq)
    g_re, g_im = square_complex(f_re, f_im)

    G = g_re + 1j * g_im
    conv = np.fft.irfft(G, n=N_fft) * N_fft
    conv = np.fft.fftshift(conv)
    return conv


def lse_from_conv(conv, beta):
    """LogSumExp of autoconvolution values."""
    c_max = np.max(conv)
    return c_max + np.log(np.sum(np.exp(np.clip(beta * (conv - c_max), -500.0, 0.0)))) / beta


# === Validation: compare FFT vs exact for uniform f ===
_e_test = np.linspace(-0.25, 0.25, 11)
_h_test = np.ones(10) * 2.0
_conv = fft_autoconv(_e_test, _h_test, N_fft=8192)
_peak_fft = np.max(_conv)
_peak_ex = peak_exact(_e_test, _h_test)
print(f'Uniform f=2, P=10:')
print(f'  FFT peak:   {_peak_fft:.6f}')
print(f'  Exact peak: {_peak_ex:.6f}')
print(f'  Error:      {abs(_peak_fft - _peak_ex):.2e}')

# Test with a non-uniform grid
_e2 = np.array([-0.25, -0.15, -0.05, 0.0, 0.1, 0.25])
_h2 = np.array([1.5, 2.5, 3.0, 2.0, 1.0])
_h2 = _h2 / np.sum(_h2 * np.diff(_e2))  # normalize
_conv2 = fft_autoconv(_e2, _h2, N_fft=16384)
_peak_fft2 = np.max(_conv2)
_peak_ex2 = peak_exact(_e2, _h2)
print(f'Non-uniform P=5:')
print(f'  FFT peak:   {_peak_fft2:.6f}')
print(f'  Exact peak: {_peak_ex2:.6f}')
print(f'  Error:      {abs(_peak_fft2 - _peak_ex2):.2e}')

# JIT warmup for backward kernels
_n_freq = 8192 // 2 + 1
_f_re, _f_im = fourier_coeffs(_e2, _h2, _n_freq)
_d_e, _d_h = fourier_backward(_e2, _h2, _f_re, _f_im, _n_freq)
_d_pw = np.zeros(5)
_dg, _de = reparam_backward(np.zeros(5), np.zeros(5), np.diff(_e2),
                             _h2, np.zeros(6), np.zeros(5), _d_pw)
print('Backward kernels compiled.')

Uniform f=2, P=10:
  FFT peak:   1.999901
  Exact peak: 2.000000
  Error:      9.89e-05
Non-uniform P=5:
  FFT peak:   2.160459
  Exact peak: 2.160494
  Error:      3.49e-05
Backward kernels compiled.


## Optimization Wrapper

FFT-based objective for L-BFGS-B, multi-start driver with joblib parallelism.

In [4]:
def _smooth_width_ratio(widths):
    """Smooth approximation to max(w)/min(w) using p-norm ratio.
    sr = (mean(w^p) * mean(w^{-p}))^{1/p}
       = ((sum w^p)^{1/p} / P^{1/p}) * ((sum w^{-p})^{1/p} / P^{1/p})  [sic]
    As p -> inf: (sum w^p)^{1/p} -> max(w), (sum w^{-p})^{-1/p} -> min(w),
    and soft_max / soft_min = (mean(w^p) * mean(w^{-p}))^{1/p} -> max/min.
    """
    p = 4.0
    P = len(widths)
    wp = widths ** p
    wn = widths ** (-p)
    return (wp.sum() * wn.sum()) ** (1.0 / p) / P ** (2.0 / p)


def _smooth_width_ratio_grad(widths, sr, lam, R, p):
    """Gradient of penalty = lam * log(1 + (sr/R)^2) w.r.t. widths."""
    S_p = (widths ** p).sum()
    S_n = (widths ** (-p)).sum()

    # d(penalty)/d(sr) via chain rule
    sr_over_R = sr / R
    d_penalty_sr = lam * 2.0 * sr_over_R / (R * (1.0 + sr_over_R ** 2))

    # log(sr) = (1/p)*(log(S_p) + log(S_n) - 2*log(P))
    # d(log sr)/dw_j = w_j^{p-1}/S_p - w_j^{-(p+1)}/S_n
    d_sr_w = sr * (widths ** (p - 1) / S_p - widths ** (-(p + 1)) / S_n)

    return d_penalty_sr * d_sr_w


def make_fft_objective_and_grad(P, beta, lam=0.05, N_fft=16384):
    """Return callable that returns (objective, gradient) for L-BFGS-B with jac=True.
    Analytical gradient via backprop: O(NP + N log N) vs O(NP^2) for finite diffs.
    """
    n_freq = N_fft // 2 + 1
    R = 50.0
    p = 4.0

    def obj_and_grad(theta):
        gamma = theta[:P]
        eta = theta[P:]

        # === Forward pass ===
        edges, heights, widths = theta_to_edges_heights(gamma, eta)
        f_re, f_im = fourier_coeffs(edges, heights, n_freq)
        g_re, g_im = square_complex(f_re, f_im)
        G = g_re + 1j * g_im
        conv_raw = np.fft.irfft(G, n=N_fft) * N_fft
        conv = np.fft.fftshift(conv_raw)

        # LSE
        c_max = np.max(conv)
        shifted = beta * (conv - c_max)
        exp_shifted = np.exp(np.clip(shifted, -500.0, 0.0))
        sum_exp = np.sum(exp_shifted)
        lse_val = c_max + np.log(sum_exp) / beta

        # Penalty
        sr = _smooth_width_ratio(widths)
        penalty = lam * np.log1p((sr / R) ** 2)
        obj_val = lse_val + penalty

        # === Backward pass ===

        # d(LSE)/d(conv) = softmax weights
        d_conv = exp_shifted / sum_exp

        # Through fftshift (adjoint = ifftshift)
        d_conv_raw = np.fft.ifftshift(d_conv)

        # Through (N_fft * irfft): adjoint uses rfft with Hermitian scaling
        rfft_d = np.fft.rfft(d_conv_raw)
        d_G_re = np.zeros(n_freq)
        d_G_im = np.zeros(n_freq)
        d_G_re[0] = rfft_d[0].real
        d_G_re[-1] = rfft_d[-1].real
        d_G_re[1:-1] = 2.0 * rfft_d[1:-1].real
        d_G_im[1:-1] = 2.0 * rfft_d[1:-1].imag

        # Through complex squaring: G = F^2
        d_F_re = 2.0 * (d_G_re * f_re + d_G_im * f_im)
        d_F_im = 2.0 * (-d_G_re * f_im + d_G_im * f_re)

        # Through Fourier coefficients -> d_edges, d_heights
        d_edges, d_heights = fourier_backward(
            edges, heights, d_F_re, d_F_im, n_freq)

        # Penalty gradient w.r.t. widths
        d_penalty_widths = _smooth_width_ratio_grad(widths, sr, lam, R, p)

        # Through reparametrization -> d_gamma, d_eta
        d_gamma, d_eta = reparam_backward(
            gamma, eta, widths, heights, d_edges, d_heights, d_penalty_widths)

        grad = np.concatenate([d_gamma, d_eta])
        return obj_val, grad

    return obj_and_grad


def theta_to_solution(theta, P):
    """Extract edges, heights, widths, exact peak, width ratio from theta."""
    gamma = theta[:P]
    eta = theta[P:]
    edges, heights, widths = theta_to_edges_heights(gamma, eta)
    exact = peak_exact(edges, heights)
    w_ratio = widths.max() / widths.min()
    return edges, heights, widths, float(exact), float(w_ratio)


def init_theta_uniform(P, noise_gamma=0.1, rng=None):
    """Initialize theta near uniform grid."""
    if rng is None:
        rng = np.random.default_rng()
    gamma = rng.normal(0, noise_gamma, size=P)
    raw = rng.exponential(1.0, size=P)
    eta = np.log(np.expm1(np.maximum(raw, 1e-6)))
    return np.concatenate([gamma, eta])


def init_theta_from_solution(P, edges, heights, noise=0.01, rng=None):
    """Warm-start theta from a known solution."""
    if rng is None:
        rng = np.random.default_rng()
    widths = np.diff(edges)
    assert len(widths) == P, f"edges has {len(widths)} bins but P={P}"
    assert len(heights) == P, f"heights has {len(heights)} entries but P={P}"
    # Invert softplus: softplus(gamma) proportional to widths
    target_sp = widths * 2 * P  # arbitrary scale, normalization handles it
    gamma = np.log(np.expm1(np.maximum(target_sp, 1e-6)))
    gamma += rng.normal(0, noise, size=P)
    # Heights: softplus(eta) proportional to heights
    target_h = np.maximum(heights, 1e-6)
    eta = np.log(np.expm1(np.maximum(target_h, 1e-6)))
    eta += rng.normal(0, noise, size=P)
    return np.concatenate([gamma, eta])


def run_single_restart(theta0, P, beta_schedule, lam=0.05,
                       maxiter_per_beta=500, N_fft=16384, verbose=False):
    """One full LSE continuation from theta0.
    Returns (fft_peak, theta, width_ratio).
    """
    theta = theta0.copy()
    for beta in beta_schedule:
        obj_grad = make_fft_objective_and_grad(P, beta, lam, N_fft)
        res = minimize(obj_grad, theta, method='L-BFGS-B', jac=True,
                       options={'maxiter': maxiter_per_beta,
                                'ftol': 1e-12, 'gtol': 1e-8})
        theta = res.x
        if verbose:
            gamma, eta = theta[:P], theta[P:]
            edges, heights, widths = theta_to_edges_heights(gamma, eta)
            fft_pk = float(np.max(fft_autoconv(edges, heights, N_fft)))
            w_ratio = float(widths.max() / widths.min())
            print(f'  beta={beta:>7.1f}  fft_peak={fft_pk:.6f}  '
                  f'w_ratio={w_ratio:.1f}  nit={res.nit}')

    # Final evaluation
    gamma, eta = theta[:P], theta[P:]
    edges, heights, widths = theta_to_edges_heights(gamma, eta)
    fft_peak = float(np.max(fft_autoconv(edges, heights, N_fft)))
    w_ratio = float(widths.max() / widths.min())
    return fft_peak, theta, w_ratio


def run_optimization(P, n_restarts=30, n_jobs=-1, warm_edges=None,
                     warm_heights=None, beta_schedule=None, lam=0.05,
                     maxiter_per_beta=500, N_fft=16384, verbose=True):
    """Full multi-start joint optimization."""
    if beta_schedule is None:
        beta_schedule = [1, 2, 4, 8, 15, 30, 60, 100, 150, 250,
                         400, 600, 1000, 1500, 2000]

    rng = np.random.default_rng(42)

    # Build initializations
    inits = []
    n_warm = 0
    if warm_edges is not None and warm_heights is not None:
        n_warm = max(1, n_restarts // 3)
        for i in range(n_warm):
            noise = 0.005 * (i + 1) / n_warm
            inits.append(init_theta_from_solution(
                P, warm_edges, warm_heights, noise=noise, rng=rng))
    for _ in range(n_restarts - n_warm):
        inits.append(init_theta_uniform(P, noise_gamma=0.1, rng=rng))

    if verbose:
        print(f'Running {n_restarts} restarts (P={P}, {n_warm} warm, '
              f'N_fft={N_fft}, {len(beta_schedule)} beta stages)...')
        t0 = time.time()

    results = Parallel(n_jobs=n_jobs, verbose=0)(
        delayed(run_single_restart)(
            inits[i], P, beta_schedule, lam, maxiter_per_beta, N_fft)
        for i in range(n_restarts)
    )

    # Find best by FFT peak
    best_fft = np.inf
    best_theta = None
    all_peaks = []
    for i, (fft_pk, theta, w_ratio) in enumerate(results):
        all_peaks.append(fft_pk)
        if fft_pk < best_fft:
            best_fft = fft_pk
            best_theta = theta.copy()
            if verbose:
                print(f'  Restart {i:>3}: fft_peak={fft_pk:.6f}, '
                      f'w_ratio={w_ratio:.1f}  <-- best')
        elif verbose and i % 10 == 0:
            print(f'  Restart {i:>3}: fft_peak={fft_pk:.6f}, '
                  f'w_ratio={w_ratio:.1f}')

    # Exact verification of best
    edges, heights, widths, exact_pk, w_ratio = theta_to_solution(best_theta, P)

    if verbose:
        elapsed = time.time() - t0
        arr = np.array(all_peaks)
        print(f'\nDone in {elapsed:.1f}s.')
        print(f'  FFT best:   {best_fft:.6f}')
        print(f'  Exact best: {exact_pk:.6f}')
        print(f'  Width ratio: {w_ratio:.2f}')
        print(f'  Median:     {np.median(arr):.6f}, '
              f'Std: {np.std(arr):.6f}')

    return exact_pk, best_theta, all_peaks


print('Optimization wrapper defined.')

Optimization wrapper defined.


## Validation at P=50

Single verbose restart to show beta-schedule progression, then full multi-start.

In [5]:
P = 50
beta_sched = [1, 2, 4, 8, 15, 30, 60, 100, 150, 250, 400, 600, 1000, 1500, 2000]

# --- Demo: single restart with per-stage verbose output ---
print('Single restart demo (P=50), per-beta-stage:')
theta0 = init_theta_uniform(P, rng=np.random.default_rng(0))
_, theta_demo, _ = run_single_restart(
    theta0, P, beta_sched, maxiter_per_beta=300, N_fft=8192, verbose=True)
edges_demo, heights_demo, widths_demo, exact_demo, wr_demo = theta_to_solution(theta_demo, P)
print(f'  => exact peak = {exact_demo:.6f}, w_ratio = {wr_demo:.2f}')

# --- Full multi-start ---
print(f'\nFull multi-start optimization:')
best_val_50, best_theta_50, all_vals_50 = run_optimization(
    P, n_restarts=30, n_jobs=-1,
    beta_schedule=beta_sched,
    maxiter_per_beta=300, N_fft=8192
)

edges_50, heights_50, widths_50, exact_50, w_ratio_50 = theta_to_solution(best_theta_50, P)
print(f'\nP={P} result:')
print(f'  Exact peak:  {exact_50:.6f}')
print(f'  Width ratio: {w_ratio_50:.2f}')
print(f'  (Uniform grid baseline ~1.522)')
if exact_50 > 1.52:
    print('  WARNING: worse than uniform baseline, check for bugs')

Single restart demo (P=50), per-beta-stage:
  beta=    1.0  fft_peak=2.752771  w_ratio=14.5  nit=300
  beta=    2.0  fft_peak=2.287141  w_ratio=16.2  nit=300
  beta=    4.0  fft_peak=1.944456  w_ratio=21.9  nit=300
  beta=    8.0  fft_peak=1.762682  w_ratio=22.6  nit=300
  beta=   15.0  fft_peak=1.662063  w_ratio=23.0  nit=300
  beta=   30.0  fft_peak=1.601604  w_ratio=24.8  nit=300
  beta=   60.0  fft_peak=1.566377  w_ratio=23.6  nit=300
  beta=  100.0  fft_peak=1.552798  w_ratio=24.1  nit=300
  beta=  150.0  fft_peak=1.546866  w_ratio=26.3  nit=300
  beta=  250.0  fft_peak=1.542602  w_ratio=28.3  nit=300
  beta=  400.0  fft_peak=1.537633  w_ratio=26.7  nit=300
  beta=  600.0  fft_peak=1.536022  w_ratio=27.1  nit=300
  beta= 1000.0  fft_peak=1.534350  w_ratio=26.8  nit=300
  beta= 1500.0  fft_peak=1.533935  w_ratio=26.2  nit=300
  beta= 2000.0  fft_peak=1.533450  w_ratio=25.9  nit=300
  => exact peak = 1.534687, w_ratio = 25.85

Full multi-start optimization:
Running 30 restarts (P=50

## Scale to P=100

In [6]:
P = 100

best_val_100, best_theta_100, all_vals_100 = run_optimization(
    P, n_restarts=40, n_jobs=-1,
    beta_schedule=beta_sched,
    maxiter_per_beta=400, N_fft=16384
)

edges_100, heights_100, widths_100, exact_100, w_ratio_100 = theta_to_solution(best_theta_100, P)
print(f'\nP={P} result:')
print(f'  Exact peak:  {exact_100:.6f}')
print(f'  Width ratio: {w_ratio_100:.2f}')

Running 40 restarts (P=100, 0 warm, N_fft=16384, 15 beta stages)...
  Restart   0: fft_peak=1.525221, w_ratio=15.6  <-- best
  Restart   1: fft_peak=1.521266, w_ratio=15.3  <-- best
  Restart   8: fft_peak=1.520583, w_ratio=23.2  <-- best
  Restart  10: fft_peak=1.521120, w_ratio=11.6
  Restart  20: fft_peak=1.523934, w_ratio=20.8
  Restart  30: fft_peak=1.520904, w_ratio=28.0

Done in 2228.5s.
  FFT best:   1.520583
  Exact best: 1.521442
  Width ratio: 23.20
  Median:     1.525756, Std: 0.002667

P=100 result:
  Exact peak:  1.521442
  Width ratio: 23.20


## Scale to P=200 with warm start

Warm-start from the best known P=200 uniform-grid solution in `best_solutions.json`.

In [7]:
# Load best known P=200 uniform solution for warm starting
with open('best_solutions.json', 'r') as f:
    best_solutions = json.load(f)

sol200 = best_solutions['heavy_P200']
warm_edges_200 = np.array(sol200['edges'])
warm_heights_200 = np.array(sol200['heights'])
print(f'Warm start from P=200 uniform solution: peak={sol200["exact_peak"]:.6f}')

P = 200
beta_sched_fine = [1, 1.5, 2, 3, 5, 8, 12, 18, 28, 42, 65, 100, 150, 230,
                   350, 500, 750, 1000, 1500, 2000, 3000]

best_val_200, best_theta_200, all_vals_200 = run_optimization(
    P, n_restarts=50, n_jobs=-1,
    warm_edges=warm_edges_200, warm_heights=warm_heights_200,
    beta_schedule=beta_sched_fine,
    maxiter_per_beta=500, N_fft=16384
)

edges_200, heights_200, widths_200, exact_200, w_ratio_200 = theta_to_solution(best_theta_200, P)
print(f'\nP={P} result:')
print(f'  Exact peak:   {exact_200:.6f}')
print(f'  Width ratio:  {w_ratio_200:.2f}')
print(f'  Baseline:     {sol200["exact_peak"]:.6f}')
print(f'  Improvement:  {sol200["exact_peak"] - exact_200:+.6f}')

Warm start from P=200 uniform solution: peak=1.509766
Running 50 restarts (P=200, 16 warm, N_fft=16384, 21 beta stages)...
  Restart   0: fft_peak=1.524770, w_ratio=34.5  <-- best
  Restart   1: fft_peak=1.524312, w_ratio=29.1  <-- best
  Restart   4: fft_peak=1.523756, w_ratio=28.6  <-- best
  Restart  10: fft_peak=1.525200, w_ratio=49.8
  Restart  18: fft_peak=1.522363, w_ratio=27.0  <-- best
  Restart  19: fft_peak=1.520906, w_ratio=48.2  <-- best
  Restart  20: fft_peak=1.520631, w_ratio=25.1  <-- best
  Restart  22: fft_peak=1.518482, w_ratio=31.2  <-- best
  Restart  27: fft_peak=1.517820, w_ratio=23.1  <-- best
  Restart  28: fft_peak=1.517474, w_ratio=19.5  <-- best
  Restart  30: fft_peak=1.518305, w_ratio=23.5
  Restart  32: fft_peak=1.516921, w_ratio=35.2  <-- best
  Restart  40: fft_peak=1.519872, w_ratio=18.2

Done in 9898.7s.
  FFT best:   1.516921
  Exact best: 1.518741
  Width ratio: 35.21
  Median:     1.522352, Std: 0.002860

P=200 result:
  Exact peak:   1.518741
  W

## Summary and Save Results

In [8]:
# Collect results
results_all = {}
for label, theta, p_val in [
    ('P50', best_theta_50, 50),
    ('P100', best_theta_100, 100),
    ('P200', best_theta_200, 200),
]:
    e, h, w, ex, wr = theta_to_solution(theta, p_val)
    results_all[label] = {
        'P': p_val,
        'exact_peak': ex,
        'width_ratio': wr,
        'edges': e.tolist(),
        'heights': h.tolist(),
    }

# Summary table
print(f'{"P":>5} | {"Exact Peak":>12} | {"W Ratio":>8} | Notes')
print('-' * 55)
for label in ['P50', 'P100', 'P200']:
    r = results_all[label]
    note = ''
    if label == 'P200':
        note = f'(uniform baseline: {sol200["exact_peak"]:.6f})'
    print(f'{r["P"]:>5} | {r["exact_peak"]:>12.6f} | {r["width_ratio"]:>8.2f} | {note}')

# Find best
best_label = min(results_all, key=lambda k: results_all[k]['exact_peak'])
print(f'\nBest overall: {best_label} -> {results_all[best_label]["exact_peak"]:.6f}')
print(f'Best known in literature: 1.5029')

# Save
out_path = 'joint_optimization_results.json'
with open(out_path, 'w') as f:
    json.dump(results_all, f, indent=2)
print(f'\nSaved to {out_path}')

    P |   Exact Peak |  W Ratio | Notes
-------------------------------------------------------
   50 |     1.528148 |    22.19 | 
  100 |     1.521442 |    23.20 | 
  200 |     1.518741 |    35.21 | (uniform baseline: 1.509766)

Best overall: P200 -> 1.518741
Best known in literature: 1.5029

Saved to joint_optimization_results.json
