# Experiments 1. Non-linear Equation System: sinus

We will minimize

$$f(x_1, x_2) = 100(x_2-x_1^2)^2 + (x_1-1)^2.$$


In [26]:
import numpy as np
import os
import matplotlib
import matplotlib.pyplot as plt
import jax
import jax.numpy as jnp
import timeit
from jax.config import config

In [27]:
from methods import gradf_inexact
from methods import GradientDescent, parse_logs, AdaptiveL, StepSize, AdaptiveNoiseGD
from methods import ConstantStepSize, AdaptiveLdelta

In [28]:
matplotlib.use('Agg')
params = {'legend.fontsize': 20,
          'legend.handlelength': 4,
          "axes.labelsize": 45,
          "xtick.labelsize": 25,
          "ytick.labelsize": 25,
          "lines.linewidth": 2,
           "axes.titlesize":30}
matplotlib.rcParams.update(params)

In [29]:
config.update("jax_enable_x64", True)

In [30]:
path_pics = "../pics/"

In [31]:
def f1(w1, w2):
    G = 100*(w2-w1**2)**2 + (w1-1)**2
    return G.sum()
gradf = jax.grad(f1, argnums=(0, 1), has_aux=False)
jit_gradf = jax.jit(gradf)

## 0. Dataset

In [32]:
sigma=0
n = 1
def gradf(x):
    z = jit_gradf(x[:n], x[n:])
    g = np.hstack([np.array(i.block_until_ready()) for i in z])
    return g
f2 = lambda x: f1(x[:n], x[n:])
gradf(np.array([1., 1.])), f2(np.array([1., 1.]))

(array([0., 0.]), 0.0)

## 1. Noise Distributed on the Unit Sphere

The case when $\xi \sim \mathcal{U}(S_1(0))$

In [33]:
eps_list = [1e-8, 1e-6, 1e-4]

n = 1
d_list = [1]

w = np.ones(2*n)*2
w = np.array([1, 1.1])
v = np.random.randn(2*n)
res = {d:{"delta":[], 
           "iters_adaptL":[], "time_adaptL":[], "adaptL,x0-x*": [], "normg_adaptL": [],
           "iters_exact":[], "time_exact":[], "exact,x0-x*": [], "normg_exact": [],
          "iters_adaptLdelta":[], "time_adaptLdelta":[], "adaptLdelta,x0-x*": [], "normg_adaptLdelta": []} for d in d_list}
mu_list = {}
number = 10
save_iter = 1
N = 10000
methods = []
np.random.seed(1)
print(np.linalg.norm(w-np.array([1, 1])))
for eps in eps_list:
    d = 1
    mu = 1e-6
    Delta = np.sqrt(eps)
    f = lambda x: f2(x) + eps * np.random.uniform(-1, 1)
    res[d]["delta"].append(int(np.log10(Delta)))
    tol = 2*Delta
    

    grad_inexact = lambda w: gradf_inexact(w, gradf, Delta, 1, v=v)
    print(eps)
    method = GradientDescent(AdaptiveL(L0=1, Delta=Delta, Lmin=mu/4, delta=eps), name="GD, Delta={}".format(Delta), save_iter=save_iter)
    x = method.solve(w, f, grad_inexact, tol=tol, max_iter=N)
    g = lambda: GradientDescent(AdaptiveL(L0=1, Delta=Delta, Lmin=mu/4),
                                return_history=False).solve(w, f, grad_inexact, tol=tol, max_iter=N)
    T = timeit.timeit(g, number=number)/number        
    print("\t{}\t{}\t{:.2f}\t{:.6f}\t{:.2f}\t{}".format(Delta, len(method.history), T*1000, np.linalg.norm(x-w), 
                                            np.linalg.norm(gradf(x))/Delta, f(x)))
    methods.append(method)
    res[d]["iters_adaptL"].append(len(method.history))
    res[d]["time_adaptL"].append("{:.2f}".format(T*1000))
    res[d]["adaptL,x0-x*"].append("{:.3f}".format(np.linalg.norm(x-w)))
    res[d]["normg_adaptL"].append("{:.2f}".format(np.linalg.norm(gradf(x))/Delta))


    method = AdaptiveNoiseGD(AdaptiveLdelta(L0=1, mindelta=1e-12, Lmin=mu/4, mu=mu, delta_alpha=2.1), name="GD, Delta={}".format(Delta), save_iter=save_iter, alpha=np.sqrt(6))
    x = method.solve(w, f, grad_inexact, max_iter=N)
    g = lambda: AdaptiveNoiseGD(AdaptiveLdelta(L0=1, mindelta=1e-12, Lmin=mu/4, mu=mu), return_history=False, 
                                alpha=np.sqrt(6)).solve(w, f, grad_inexact, max_iter=N)
    T = timeit.timeit(g, number=number)/number        
    print("\t{}\t{}\t{:.2f}\t{:.6f}\t{:.2f}\t{}".format(Delta, len(method.history), T*1000, np.linalg.norm(x-w), 
                                            np.linalg.norm(gradf(x))/Delta, f(x)))
    methods.append(method)
    res[d]["iters_adaptLdelta"].append(len(method.history))
    res[d]["time_adaptLdelta"].append("{:.2f}".format(T*1000))
    res[d]["adaptLdelta,x0-x*"].append("{:.3f}".format(np.linalg.norm(x-w)))
    res[d]["normg_adaptLdelta"].append("{:.2f}".format(np.linalg.norm(gradf(x))/Delta))     
    print("\n")


0.10000000000000009
1e-08
	0.0001	3721	2884.37	0.099340	2.95	1.0938547439412939e-07
	0.0001	24	34.13	0.044916	327.32	0.0012714073464514157


1e-06
	0.001	2121	3908.88	0.093600	2.98	1.0959428014296009e-05
	0.001	22	5.31	0.044511	294.24	0.0014459181571573822


0.0001
	0.01	229	3383.24	0.047393	2.79	0.0010147837378224266
	0.01	11	2.91	0.047995	431.53	0.010196781088944888




In [36]:
s = ""

for d in d_list:
    s += str(d) + " & "

    cur_list = ["$10^{{{}}}$".format(i) for i in res[d]["delta"]]
    s+= "\\begin{tabular}{@{}c@{}} " + " \\\\ ".join(cur_list) + " \\end{tabular}&"

    cur_list = ["${}$".format(i) for i in res[d]["iters_adaptL"]]
    s+= "\\begin{tabular}{@{}c@{}} " + " \\\\ ".join(cur_list) + " \\end{tabular}&"
    cur_list = ["${}$".format(i) for i in res[d]["time_adaptL"]]
    s+= "\\begin{tabular}{@{}c@{}} " + " \\\\ ".join(cur_list) + " \\end{tabular}&"
    
    cur_list = ["${}$".format(i) for i in res[d]["iters_adaptLdelta"]]
    s+= "\\begin{tabular}{@{}c@{}} " + " \\\\ ".join(cur_list) + " \\end{tabular}&"
    cur_list = ["${}$".format(i) for i in res[d]["time_adaptLdelta"]]
    s+= "\\begin{tabular}{@{}c@{}} " + " \\\\ ".join(cur_list) + " \\end{tabular}"

    s+= "\\\\\n\\hline\n"
print(s)

1 & \begin{tabular}{@{}c@{}} $10^{-4}$ \\ $10^{-3}$ \\ $10^{-2}$ \end{tabular}&\begin{tabular}{@{}c@{}} $3721$ \\ $2121$ \\ $229$ \end{tabular}&\begin{tabular}{@{}c@{}} $2884.37$ \\ $3908.88$ \\ $3383.24$ \end{tabular}&\begin{tabular}{@{}c@{}} $24$ \\ $22$ \\ $11$ \end{tabular}&\begin{tabular}{@{}c@{}} $34.13$ \\ $5.31$ \\ $2.91$ \end{tabular}\\
\hline



In [37]:
s = ""

for d in d_list:
    s += str(d) + " & "
    cur_list = ["$10^{{{}}}$".format(i) for i in res[d]["delta"]]


    s+= "\\begin{tabular}{@{}c@{}} " + " \\\\ ".join(cur_list) + " \\end{tabular}&"
    cur_list = ["${}$".format(i) for i in res[d]["adaptL,x0-x*"]]
    s+= "\\begin{tabular}{@{}c@{}} " + " \\\\ ".join(cur_list) + " \\end{tabular}&"
    cur_list = ["${}$".format(i) for i in res[d]["normg_adaptL"]]
    s+= "\\begin{tabular}{@{}c@{}} " + " \\\\ ".join(cur_list) + " \\end{tabular}&"
    
    cur_list = ["${}$".format(i) for i in res[d]["adaptLdelta,x0-x*"]]
    s+= "\\begin{tabular}{@{}c@{}} " + " \\\\ ".join(cur_list) + " \\end{tabular}&"
    cur_list = ["${}$".format(i) for i in res[d]["normg_adaptLdelta"]]
    s+= "\\begin{tabular}{@{}c@{}} " + " \\\\ ".join(cur_list) + " \\end{tabular}"

    s+= "\\\\\n\\hline\n"
print(s)

1 & \begin{tabular}{@{}c@{}} $10^{-4}$ \\ $10^{-3}$ \\ $10^{-2}$ \end{tabular}&\begin{tabular}{@{}c@{}} $0.099$ \\ $0.094$ \\ $0.047$ \end{tabular}&\begin{tabular}{@{}c@{}} $2.95$ \\ $2.98$ \\ $2.79$ \end{tabular}&\begin{tabular}{@{}c@{}} $0.045$ \\ $0.045$ \\ $0.048$ \end{tabular}&\begin{tabular}{@{}c@{}} $327.32$ \\ $294.24$ \\ $431.53$ \end{tabular}\\
\hline

