In [None]:
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import sympy as sp
from scipy.special import erf
from scipy.integrate import quad
from scipy.fft import dst
import networkx as nx
import math

from utils import *
from intersection_graph import *

mpl.rcParams.update(mpl.rcParamsDefault)
plt.rc('text', usetex=False)

In [16]:
# ---------- activations ----------
def relu(z):      return np.maximum(0.0, z)
def drelu(z):     return (z > 0).astype(float)  # subgradient a.e.; at 0 we take 0
def tanh(z):      return np.tanh(z)
def dtanh(z):     return 1.0 - np.tanh(z)**2
def sigmoid(z):   return 1.0 / (1.0 + np.exp(-z))
def dsigmoid(z):  s = sigmoid(z); return s * (1.0 - s)
def gelu(z):      return 0.5 * z * (1.0 + erf(z / np.sqrt(2)))
def dgelu(z):
    # exact derivative of GELU = Φ(z) + z φ(z), where φ is std normal pdf
    Phi = 0.5 * (1.0 + erf(z / np.sqrt(2)))
    phi = (1.0 / np.sqrt(2*np.pi)) * np.exp(-0.5 * z**2)
    return Phi + z * phi

# Choose your activation here:
phi, dphi = (tanh, dtanh)

# ---------- objective pieces ----------
def h(x):
    """h(x) = <x, f(x)> = sum_i x_i * phi(x_i)"""
    return float(np.dot(x, phi(x)))

def grad_h(x):
    """∇h(x) = f(x) + J_phi(x)^T x = phi(x) + dphi(x) * x  (elementwise)"""
    return phi(x) + dphi(x) * x

def proj_tangent(x, v):
    """Project v onto the tangent space at x on the unit sphere: (I - xx^T) v"""
    return v - x * np.dot(x, v)

def step_on_sphere(x, g, eta):
    """Retraction by normalisation (first-order accurate)"""
    y = x - eta * g
    return y / np.linalg.norm(y)

# ---------- solver ----------
def find_on_vanishing_set(n=8, seed=0, steps=2000, lr=0.1, tol=1e-10, verbose=False):
    rng = np.random.default_rng(seed)
    x = rng.normal(size=n)
    x /= np.linalg.norm(x)  # start on S^{n-1}

    for k in range(steps):
        hx = h(x)
        if abs(hx) <= tol:
            if verbose:
                print(f"Converged at iter {k}, |h|={abs(hx):.2e}")
            return x, hx

        # Riemannian gradient of J(x) = 0.5 * h(x)^2 on the sphere:
        # grad_S J = h(x) * proj_tangent(x, ∇h(x))
        g_euc = grad_h(x)
        g_sph = hx * proj_tangent(x, g_euc)

        # take a step and retract
        x = step_on_sphere(x, g_sph, lr)

        # (optional) small adaptive damping if you see oscillations
        # if k % 200 == 199: lr *= 0.9

    if verbose:
        print(f"Stopped at iter {steps}, |h|={abs(h(x)):.2e}")
    return x, h(x)

# ---------- demo ----------
if __name__ == "__main__":
    x_star, h_val = find_on_vanishing_set(n=6, seed=42, steps=3000, lr=0.2, tol=1e-12, verbose=True)
    print("x* on sphere:", x_star)
    print("<x*, f(x*)> =", h_val)


Stopped at iter 3000, |h|=7.62e-01
x* on sphere: [ 7.22851800e-175 -3.17408844e-174  1.98652715e-174  2.71102429e-174
 -1.00000000e+000 -4.91110175e-174]
<x*, f(x*)> = 0.7615941559557649
