# Adaptive Grid Optimization for $C_{1a}$

## Summary
- **Method**: Non-uniform grid refinement + L-BFGS-B polish (with optional DE)
- **Key idea**: All prior work uses uniform grids. The extremizer likely has structure (boundary effects, cusps) that benefits from non-uniform discretization.
- **Best result**: 1.537413 (P=500, exact), starting from uniform P=200 solution (1.509766)
- **Outcome**: Adaptive grids did NOT improve over uniform grid baseline. Non-uniform grids introduce interpolation artifacts that L-BFGS cannot overcome. The FFT evaluation is also less accurate on non-uniform grids.

## Approach
1. Start from the best known uniform-grid solution (P=200, peak=1.5098)
2. Identify regions of high curvature/structure in f
3. Refine grid in those regions (curvature + boundary weighting)
4. Re-optimize on the non-uniform grid via L-BFGS-B
5. Iterate: refine -> optimize -> refine -> optimize

## Key findings
- DE (differential evolution) converges to ~1.7-1.8 on non-uniform grids â€” far from optimal
- L-BFGS-B from interpolated warm-start stays near ~1.54, worse than starting uniform solution
- Width ratios up to 114:1 cause FFT evaluation errors of ~0.005
- **Conclusion**: Adaptive grids are not useful without a fundamentally different optimizer that handles non-uniform discretization natively

In [None]:
import numpy as np
from scipy.optimize import minimize
from numba import njit, prange
import matplotlib.pyplot as plt
import json
import time

print(f"NumPy {np.__version__}")
print(f"CPU cores available: {np.os.cpu_count()}")

In [None]:
# === Numba JIT-compiled exact autoconvolution ===

@njit(cache=True)
def _autoconv_at_points(edges, heights, t_values):
    """Compute autoconvolution at given t values."""
    N = len(heights)
    T = len(t_values)
    result = np.zeros(T)
    a = edges[:-1]
    b = edges[1:]
    for ti in range(T):
        t = t_values[ti]
        total = 0.0
        for i in range(N):
            for j in range(N):
                lo = max(a[i], t - b[j])
                hi = min(b[i], t - a[j])
                if hi > lo:
                    total += heights[i] * heights[j] * (hi - lo)
        result[ti] = total
    return result


@njit(parallel=True, cache=True)
def _autoconv_at_points_parallel(edges, heights, t_values):
    """Parallel version for large number of t values."""
    N = len(heights)
    T = len(t_values)
    result = np.zeros(T)
    a = edges[:-1]
    b = edges[1:]
    for ti in prange(T):
        t = t_values[ti]
        total = 0.0
        for i in range(N):
            for j in range(N):
                lo = max(a[i], t - b[j])
                hi = min(b[i], t - a[j])
                if hi > lo:
                    total += heights[i] * heights[j] * (hi - lo)
        result[ti] = total
    return result


def compute_breakpoints(edges):
    """All pairwise edge sums where autoconvolution can peak."""
    bp = (edges[:, None] + edges[None, :]).ravel()
    bp = np.unique(bp)
    return bp[(bp >= -0.5) & (bp <= 0.5)]


def peak_autoconv_exact(heights, edges):
    """Compute exact peak using Numba-accelerated evaluation."""
    bp = compute_breakpoints(edges)
    if len(bp) > 500:
        conv = _autoconv_at_points_parallel(edges, heights, bp)
    else:
        conv = _autoconv_at_points(edges, heights, bp)
    return float(np.max(conv))


def peak_autoconv_fft(heights, edges, n_grid=8192):
    """Fast FFT-based peak estimation. NOT exact but O(N log N)."""
    dx = 0.5 / n_grid
    x = -0.25 + np.arange(n_grid) * dx
    idx = np.searchsorted(edges, x, side='right') - 1
    idx = np.clip(idx, 0, len(heights) - 1)
    f_vals = heights[idx]
    f_vals = np.where((x < edges[0]) | (x >= edges[-1]), 0.0, f_vals)
    f_padded = np.zeros(2 * n_grid)
    f_padded[:n_grid] = f_vals
    F = np.fft.rfft(f_padded)
    conv_padded = np.fft.irfft(F * F) * dx
    t_values = -0.5 + np.arange(2 * n_grid) * dx
    mask = (t_values >= -0.5) & (t_values <= 0.5)
    return float(np.max(conv_padded[mask]))


# Warm up JIT
print("Warming up Numba JIT...")
_e = np.linspace(-0.25, 0.25, 51)
_h = np.ones(50) * 2.0
_bp = compute_breakpoints(_e)
_ = _autoconv_at_points(_e, _h, _bp[:10])
_ = _autoconv_at_points_parallel(_e, _h, _bp)
print("Done.")

In [None]:
# === Core functions ===

def normalize_heights(heights, widths):
    h = np.maximum(heights, 0.0)
    total = np.sum(h * widths)
    if total > 1e-15:
        h = h / total
    return h


def objective_fft(x, edges, widths):
    heights = normalize_heights(x, widths)
    return peak_autoconv_fft(heights, edges)


def optimize_lbfgs(heights_init, edges, maxiter=500, verbose=True):
    widths = np.diff(edges)
    n = len(heights_init)
    bounds = [(0, None)] * n
    iter_count = [0]
    best_val = [np.inf]
    def callback(xk):
        iter_count[0] += 1
        val = objective_fft(xk, edges, widths)
        if val < best_val[0]:
            best_val[0] = val
        if verbose and iter_count[0] % 50 == 0:
            print(f"      L-BFGS iter {iter_count[0]}: best={best_val[0]:.6f}")
    result = minimize(objective_fft, heights_init.copy(), args=(edges, widths),
                      method='L-BFGS-B', bounds=bounds, callback=callback,
                      options={'maxiter': maxiter, 'disp': False})
    best_heights = normalize_heights(result.x, widths)
    return peak_autoconv_fft(best_heights, edges), best_heights


def compute_curvature(heights, edges):
    n = len(heights)
    scores = np.zeros(n)
    for i in range(n):
        if i > 0: scores[i] += abs(heights[i] - heights[i - 1])
        if i < n - 1: scores[i] += abs(heights[i] - heights[i + 1])
    h_max = max(heights.max(), 1e-10)
    scores += 0.5 * heights / h_max
    centers = 0.5 * (edges[:-1] + edges[1:])
    scores += 0.3 * np.exp(-50 * (0.25 - np.abs(centers)) ** 2)
    return scores


def refine_grid(heights, edges, target_bins, blend=0.5):
    scores = compute_curvature(heights, edges)
    widths = np.diff(edges)
    density = scores / widths
    density = density / density.sum()
    uniform = np.ones(len(heights)) / len(heights)
    density = (1 - blend) * density + blend * uniform
    density = density / density.sum()
    cdf = np.concatenate([[0], np.cumsum(density * widths)])
    cdf = cdf / cdf[-1]
    new_edges = np.interp(np.linspace(0, 1, target_bins + 1), cdf, edges)
    new_edges[0], new_edges[-1] = edges[0], edges[-1]
    return new_edges


def interpolate_heights(old_heights, old_edges, new_edges):
    old_centers = 0.5 * (old_edges[:-1] + old_edges[1:])
    new_centers = 0.5 * (new_edges[:-1] + new_edges[1:])
    new_heights = np.maximum(np.interp(new_centers, old_centers, old_heights), 0)
    new_widths = np.diff(new_edges)
    integral = np.sum(new_heights * new_widths)
    if integral > 1e-15:
        new_heights = new_heights / integral
    return new_heights


print("All functions defined.")

In [None]:
# === Load best known solution ===
with open('best_solutions.json', 'r') as f:
    solutions = json.load(f)

start_sol = solutions['heavy_P200']
P_start = start_sol['P']
start_edges = np.array(start_sol['edges'])
start_heights = np.array(start_sol['heights'])
start_peak = start_sol['exact_peak']
print(f"Starting from heavy_P200: P={P_start}, peak={start_peak:.6f}")

In [None]:
# === Main adaptive grid optimization ===
# Uses L-BFGS only (DE was found to converge to ~1.7-1.8 on non-uniform grids)

target_bins_seq = [200, 250, 300, 400, 500]
blend_seq = [0.15, 0.12, 0.1, 0.08, 0.05]

heights, edges = start_heights.copy(), start_edges.copy()
global_best_val, global_best_heights, global_best_edges = np.inf, None, None
results = []
t_start = time.time()

for stage, (target_bins, blend) in enumerate(zip(target_bins_seq, blend_seq)):
    print(f"\nSTAGE {stage+1}/{len(target_bins_seq)}: {target_bins} bins, blend={blend}")
    new_edges = refine_grid(heights, edges, target_bins, blend=blend)
    new_heights = interpolate_heights(heights, edges, new_edges)
    widths = np.diff(new_edges)
    print(f"  Grid: min_w={widths.min():.5f}, max_w={widths.max():.5f}, ratio={widths.max()/widths.min():.1f}")
    
    best_val_fft = peak_autoconv_fft(new_heights, new_edges)
    best_h = new_heights.copy()
    
    for r in range(5):  # 5 L-BFGS restarts
        if r == 0: h_init = new_heights.copy()
        elif r < 3: h_init = np.maximum(best_h + np.random.randn(target_bins) * 0.05*(r+1) * best_h.mean(), 0)
        else: h_init = np.maximum(best_h * (1 + 0.1*np.random.randn(target_bins)), 0)
        try:
            val, h = optimize_lbfgs(h_init, new_edges, 500, verbose=False)
            if val < best_val_fft:
                best_val_fft, best_h = val, h.copy()
                print(f"    restart {r+1}: {val:.6f} <- improved")
        except: pass
    
    exact_val = peak_autoconv_exact(best_h, new_edges)
    print(f"  FFT={best_val_fft:.6f}, EXACT={exact_val:.6f}")
    results.append({'bins': target_bins, 'exact_peak': exact_val})
    
    if exact_val < global_best_val:
        global_best_val = exact_val
        global_best_heights, global_best_edges = best_h.copy(), new_edges.copy()
        print(f"  *** NEW BEST: {exact_val:.6f} ***")
    heights, edges = best_h, new_edges

print(f"\nDone in {time.time()-t_start:.1f}s. Best: {global_best_val:.6f}")
print(f"Gap to literature: {global_best_val - 1.5029:+.6f}")
print(f"Improvement from start: {start_peak - global_best_val:+.6f}")

## Results

| Bins | Exact Peak |
|------|------------|
| 200  | 1.541514   |
| 250  | 1.542924   |
| 300  | 1.540497   |
| 400  | 1.539041   |
| 500  | 1.537413   |

**Conclusion**: The adaptive grid approach **worsened** the objective from 1.5098 (uniform P=200 baseline) to 1.5374 (best adaptive). The non-uniform grid introduces:
1. Interpolation artifacts when transferring solutions between grids
2. FFT evaluation errors from extreme width ratios (up to 114:1)
3. L-BFGS gets trapped in worse local minima on the distorted landscape

This partially answers K2 (boundary singularity question): if a singularity exists, it cannot be exploited by naive grid refinement + standard optimizers.