# Experiments 7. Nesterov-Skokov Function with Antigradient inexactness

We will minimize

$$f(x) = \frac{1}{4}(1-x_1)^2+\sum\limits_{i=1}^{n-1}\left(x_{i+1}-2x_i^2 + 1\right)^2.$$


In [1]:
import numpy as np
import os
import matplotlib
import matplotlib.pyplot as plt
import jax
import jax.numpy as jnp
import timeit
from jax.config import config

In [2]:
from methods import gradf_inexact
from methods import GradientDescent, parse_logs, AdaptiveL, StepSize, AdaptiveNoiseGD
from methods import ConstantStepSize, AdaptiveLdelta

In [3]:
matplotlib.use('Agg')
params = {'legend.fontsize': 20,
          'legend.handlelength': 4,
          "axes.labelsize": 45,
          "xtick.labelsize": 25,
          "ytick.labelsize": 25,
          "lines.linewidth": 2,
           "axes.titlesize":30}
matplotlib.rcParams.update(params)

In [4]:
config.update("jax_enable_x64", True)

In [5]:
path_pics = "../pics/"

In [6]:
def f1(x, n):
    G = 1/4*(1-x[0])**2
    for i in range(n-1):
        G += (x[i+1] - 2 * x[i]**2 +1)**2
    return G.sum()
gradf = jax.grad(f1, argnums=(0,), has_aux=False)
jit_gradf = jax.jit(gradf)
gradf = lambda x: np.hstack([np.array(i.block_until_ready()) for i in jit_gradf(x)])

def get_params(n):
    f = lambda x: f1(x, n)
    gradf = jax.grad(f, argnums=(0,), has_aux=False)
    jit_gradf = jax.jit(gradf)
    gradf = lambda x: np.hstack([np.array(i.block_until_ready()) for i in jit_gradf(x)])
    return gradf, f
gradf, f = get_params(2)
gradf(np.ones(2))



array([0., 0.])

## 1. Noise Distributed on the Unit Sphere

The case when $\xi \sim \mathcal{U}(S_1(0))$

In [7]:
eps_list = [1e-8, 1e-6, 1e-4]

n = 1
n_list = [3, 5, 7]

res = {n:{"delta":[], 
           "iters_adaptL":[], "time_adaptL":[], "adaptL,x0-x*": [], "normg_adaptL": [],
           "iters_exact":[], "time_exact":[], "exact,x0-x*": [], "normg_exact": [],
          "residual_adaptL":[],
          "iters_adaptLdelta":[], "time_adaptLdelta":[], "adaptLdelta,x0-x*": [], "normg_adaptLdelta": []} for n in n_list}
mu_list = {}
number = 10
save_iter = 1
N = 900000
methods = []
np.random.seed(1)
print(2)
for n in n_list:
    print(n)
    gradf, f = get_params(n)
    w = np.ones(n)
    w[0] *= -1
    print(np.linalg.norm(gradf(w)), f(w))
    for eps in eps_list:
        mu = 1e-6
        Delta = np.sqrt(eps)
        f_inexact = lambda x: f(x) + eps * np.random.uniform(-1, 1)
        res[n]["delta"].append(int(np.log10(Delta)))
        tol = 2*Delta


        grad_inexact = lambda w: gradf_inexact(w, gradf, Delta, 3)
        print(eps)
        method = GradientDescent(AdaptiveL(L0=1, Delta=Delta, Lmin=mu/4, delta=eps), name="GD, Delta={}".format(Delta), save_iter=save_iter)
        x = method.solve(w, f_inexact, grad_inexact, tol=tol, max_iter=N)
        g = lambda: GradientDescent(AdaptiveL(L0=1, Delta=Delta, Lmin=mu/4),
                                    return_history=False).solve(w, f_inexact, grad_inexact, tol=tol, max_iter=N)
        if len(method.history) > 1000:
            number = 1
        else:
            number = 10
        T = timeit.timeit(g, number=number)/number        
        print("\t{}\t{}\t{:.2f}\t{:.6f}\t{:.2f}\t{}".format(Delta, len(method.history), T, np.linalg.norm(x-w), 
                                                np.linalg.norm(gradf(x))/Delta, f(x)))
        methods.append(method)
        res[n]["iters_adaptL"].append(len(method.history))
        res[n]["time_adaptL"].append("{:.2f}".format(T))
        res[n]["adaptL,x0-x*"].append("{:.3f}".format(np.linalg.norm(x-w)))
        res[n]["normg_adaptL"].append("{:.2f}".format(np.linalg.norm(gradf(x))/Delta))
        res[n]["residual_adaptL"].append(f(x))
        print("\n")
    print("\n")

2
3
1.0 1.0
1e-08
	0.0001	13703	247.73	1.996551	2.99	2.2389726785421574e-05


1e-06
	0.001	2531	258.80	2.163126	2.99	0.0012053276867817668


0.0001
	0.01	498	394.30	2.751641	2.97	0.012898109089983944




5
1.0 1.0
1e-08
	0.0001	55452	303.51	2.893251	3.00	0.00035770029365469417


1e-06
	0.001	15295	298.79	3.338827	2.80	0.0051303037949482575


0.0001
	0.01	5	60.15	0.035755	2.06	0.9830358914472521




7
1.0 1.0
1e-08
	0.0001	22	97.41	0.035677	2.56	0.983031159538502


1e-06
	0.001	9	134.75	0.035671	2.35	0.9830312515634204


0.0001
	0.01	5	106.39	0.035755	2.06	0.9830358914472521






In [8]:
s = ""

for d in n_list:
    s += str(d) + " & "

    cur_list = ["$10^{{{}}}$".format(i) for i in res[d]["delta"]]
    s+= "\\begin{tabular}{@{}c@{}} " + " \\\\ ".join(cur_list) + " \\end{tabular}&"

    cur_list = ["${}$".format(i) for i in res[d]["iters_adaptL"]]
    s+= "\\begin{tabular}{@{}c@{}} " + " \\\\ ".join(cur_list) + " \\end{tabular}&"
    cur_list = ["${}$".format(i) for i in res[d]["time_adaptL"]]
    s+= "\\begin{tabular}{@{}c@{}} " + " \\\\ ".join(cur_list) + " \\end{tabular}&"
    
    cur_list = ["${}$".format(i) for i in res[d]["adaptL,x0-x*"]]
    s+= "\\begin{tabular}{@{}c@{}} " + " \\\\ ".join(cur_list) + " \\end{tabular}&"
    cur_list = ["${}$".format(i) for i in res[d]["normg_adaptL"]]
    s+= "\\begin{tabular}{@{}c@{}} " + " \\\\ ".join(cur_list) + " \\end{tabular}&"
    
    cur_list1 = [(i/10**(int(np.log10(i))), int(np.log10(i))) for i in res[d]["residual_adaptL"]]
    cur_list = []
    for a, b in cur_list1:
        if b != 0:
            cur_list.append("${:.2f} \\cdot 10^{{{}}}$".format(a, b))
        else:
            cur_list.append("${:.2f}$".format(a))

    s+= "\\begin{tabular}{@{}c@{}} " + " \\\\ ".join(cur_list) + " \\end{tabular}"

    s+= "\\\\\n\\hline\n"
print(s)

3 & \begin{tabular}{@{}c@{}} $10^{-4}$ \\ $10^{-3}$ \\ $10^{-2}$ \end{tabular}&\begin{tabular}{@{}c@{}} $13703$ \\ $2531$ \\ $498$ \end{tabular}&\begin{tabular}{@{}c@{}} $247.73$ \\ $258.80$ \\ $394.30$ \end{tabular}&\begin{tabular}{@{}c@{}} $1.997$ \\ $2.163$ \\ $2.752$ \end{tabular}&\begin{tabular}{@{}c@{}} $2.99$ \\ $2.99$ \\ $2.97$ \end{tabular}&\begin{tabular}{@{}c@{}} $0.22 \cdot 10^{-4}$ \\ $0.12 \cdot 10^{-2}$ \\ $0.13 \cdot 10^{-1}$ \end{tabular}\\
\hline
5 & \begin{tabular}{@{}c@{}} $10^{-4}$ \\ $10^{-3}$ \\ $10^{-2}$ \end{tabular}&\begin{tabular}{@{}c@{}} $55452$ \\ $15295$ \\ $5$ \end{tabular}&\begin{tabular}{@{}c@{}} $303.51$ \\ $298.79$ \\ $60.15$ \end{tabular}&\begin{tabular}{@{}c@{}} $2.893$ \\ $3.339$ \\ $0.036$ \end{tabular}&\begin{tabular}{@{}c@{}} $3.00$ \\ $2.80$ \\ $2.06$ \end{tabular}&\begin{tabular}{@{}c@{}} $0.36 \cdot 10^{-3}$ \\ $0.51 \cdot 10^{-2}$ \\ $0.98$ \end{tabular}\\
\hline
7 & \begin{tabular}{@{}c@{}} $10^{-4}$ \\ $10^{-3}$ \\ $10^{-2}$ \end{tabular}