# Experiments 7. Nesterov-Skokov Function

We will minimize

$$f(x) = \frac{1}{4}(1-x_1)^2+\sum\limits_{i=1}^{n-1}\left(x_{i+1}-2x_i^2 + 1\right)^2.$$


In [2]:
import numpy as np
import os
import matplotlib
import matplotlib.pyplot as plt
import jax
import jax.numpy as jnp
import timeit
from jax.config import config

In [3]:
from methods import gradf_inexact
from methods import GradientDescent, parse_logs, AdaptiveL, StepSize, AdaptiveNoiseGD
from methods import ConstantStepSize, AdaptiveLdelta

In [4]:
matplotlib.use('Agg')
params = {'legend.fontsize': 20,
          'legend.handlelength': 4,
          "axes.labelsize": 45,
          "xtick.labelsize": 25,
          "ytick.labelsize": 25,
          "lines.linewidth": 2,
           "axes.titlesize":30}
matplotlib.rcParams.update(params)

In [5]:
config.update("jax_enable_x64", True)

In [6]:
path_pics = "../pics/"

In [27]:
def f1(x, n):
    G = 1/4*(1-x[0])**2
    for i in range(n-1):
        G += (x[i+1] - 2 * x[i]**2 +1)**2
    return G.sum()
gradf = jax.grad(f1, argnums=(0,), has_aux=False)
jit_gradf = jax.jit(gradf)
gradf = lambda x: np.hstack([np.array(i.block_until_ready()) for i in jit_gradf(x)])

def get_params(n):
    f = lambda x: f1(x, n)
    gradf = jax.grad(f, argnums=(0,), has_aux=False)
    jit_gradf = jax.jit(gradf)
    gradf = lambda x: np.hstack([np.array(i.block_until_ready()) for i in jit_gradf(x)])
    return gradf, f
gradf, f = get_params(2)
gradf(np.ones(2))

array([0., 0.])

## 1. Noise Distributed on the Unit Sphere

The case when $\xi \sim \mathcal{U}(S_1(0))$

In [45]:
eps_list = [1e-8, 1e-6, 1e-4]

n = 1
n_list = [3, 5, 7]

res = {n:{"delta":[], 
           "iters_adaptL":[], "time_adaptL":[], "adaptL,x0-x*": [], "normg_adaptL": [],
           "iters_exact":[], "time_exact":[], "exact,x0-x*": [], "normg_exact": [],
          "residual_adaptL":[],
          "iters_adaptLdelta":[], "time_adaptLdelta":[], "adaptLdelta,x0-x*": [], "normg_adaptLdelta": []} for n in n_list}
mu_list = {}
number = 10
save_iter = 1
N = 900000
methods = []
np.random.seed(1)
print(2)
for n in n_list:
    print(n)
    gradf, f = get_params(n)
    w = np.ones(n)
    w[0] *= -1
    print(np.linalg.norm(gradf(w)), f(w))
    for eps in eps_list:
        mu = 1e-6
        Delta = np.sqrt(eps)
        res[n]["delta"].append(int(np.log10(Delta)))
        tol = 2*Delta


        grad_inexact = lambda w: gradf_inexact(w, gradf, Delta, 1)
        print(eps)
        method = GradientDescent(AdaptiveL(L0=1, Delta=Delta, Lmin=mu/4, delta=eps), name="GD, Delta={}".format(Delta), save_iter=save_iter)
        x = method.solve(w, f, grad_inexact, tol=tol, max_iter=N)
        g = lambda: GradientDescent(AdaptiveL(L0=1, Delta=Delta, Lmin=mu/4),
                                    return_history=False).solve(w, f, grad_inexact, tol=tol, max_iter=N)
        if len(method.history) > 10000:
            number = 1
        else:
            number = 10
        T = timeit.timeit(g, number=number)/number        
        print("\t{}\t{}\t{:.2f}\t{:.6f}\t{:.2f}\t{}".format(Delta, len(method.history), T*1000, np.linalg.norm(x-w), 
                                                np.linalg.norm(gradf(x))/Delta, f(x)))
        methods.append(method)
        res[n]["iters_adaptL"].append(len(method.history))
        res[n]["time_adaptL"].append("{:.2f}".format(T*1000))
        res[n]["adaptL,x0-x*"].append("{:.3f}".format(np.linalg.norm(x-w)))
        res[n]["normg_adaptL"].append("{:.2f}".format(np.linalg.norm(gradf(x))/Delta))
        res[n]["residual_adaptL"].append(f(x))
        print("\n")
    print("\n")

2
3
1.0 1.0
1e-08
	0.0001	13641	1838.11	1.996474	2.88	2.073095046495774e-05


1e-06
	0.001	2563	326.36	2.147914	2.88	0.0010854164066831924


0.0001
	0.01	517	67.69	2.746849	2.88	0.01267056251194494




5
1.0 1.0
1e-08
	0.0001	68021	10052.45	2.910212	2.93	0.00032773964007254245


1e-06
	0.001	15340	2032.23	3.309149	2.62	0.004876602167643842


0.0001
	0.01	6	1.12	0.035136	1.94	0.9830409409279663




7
1.0 1.0
1e-08
	0.0001	499806	51.71	3.037382	1.96	0.0011236712731749492


1e-06
	0.001	128	1.88	0.035686	2.67	0.9830308879593596


0.0001
	0.01	28	1.12	0.035468	2.21	0.9830369598743012






In [46]:
s = ""

for d in n_list:
    s += str(d) + " & "

    cur_list = ["$10^{{{}}}$".format(i) for i in res[d]["delta"]]
    s+= "\\begin{tabular}{@{}c@{}} " + " \\\\ ".join(cur_list) + " \\end{tabular}&"

    cur_list = ["${}$".format(i) for i in res[d]["iters_adaptL"]]
    s+= "\\begin{tabular}{@{}c@{}} " + " \\\\ ".join(cur_list) + " \\end{tabular}&"
    cur_list = ["${}$".format(i) for i in res[d]["time_adaptL"]]
    s+= "\\begin{tabular}{@{}c@{}} " + " \\\\ ".join(cur_list) + " \\end{tabular}&"
    
    cur_list = ["${}$".format(i) for i in res[d]["adaptL,x0-x*"]]
    s+= "\\begin{tabular}{@{}c@{}} " + " \\\\ ".join(cur_list) + " \\end{tabular}&"
    cur_list = ["${}$".format(i) for i in res[d]["normg_adaptL"]]
    s+= "\\begin{tabular}{@{}c@{}} " + " \\\\ ".join(cur_list) + " \\end{tabular}&"
    
    cur_list1 = [(i/10**(int(np.log10(i))), int(np.log10(i))) for i in res[d]["residual_adaptL"]]
    cur_list = []
    for a, b in cur_list1:
        if b != 0:
            cur_list.append("${:.2f} \\cdot 10^{{{}}}$".format(a, b))
        else:
            cur_list.append("${:.2f}$".format(a))

    s+= "\\begin{tabular}{@{}c@{}} " + " \\\\ ".join(cur_list) + " \\end{tabular}"

    s+= "\\\\\n\\hline\n"
print(s)

3 & \begin{tabular}{@{}c@{}} $10^{-4}$ \\ $10^{-3}$ \\ $10^{-2}$ \end{tabular}&\begin{tabular}{@{}c@{}} $13641$ \\ $2563$ \\ $517$ \end{tabular}&\begin{tabular}{@{}c@{}} $1838.11$ \\ $326.36$ \\ $67.69$ \end{tabular}&\begin{tabular}{@{}c@{}} $1.996$ \\ $2.148$ \\ $2.747$ \end{tabular}&\begin{tabular}{@{}c@{}} $2.88$ \\ $2.88$ \\ $2.88$ \end{tabular}&\begin{tabular}{@{}c@{}} $0.21 \cdot 10^{-4}$ \\ $0.11 \cdot 10^{-2}$ \\ $0.13 \cdot 10^{-1}$ \end{tabular}\\
\hline
5 & \begin{tabular}{@{}c@{}} $10^{-4}$ \\ $10^{-3}$ \\ $10^{-2}$ \end{tabular}&\begin{tabular}{@{}c@{}} $68021$ \\ $15340$ \\ $6$ \end{tabular}&\begin{tabular}{@{}c@{}} $10052.45$ \\ $2032.23$ \\ $1.12$ \end{tabular}&\begin{tabular}{@{}c@{}} $2.910$ \\ $3.309$ \\ $0.035$ \end{tabular}&\begin{tabular}{@{}c@{}} $2.93$ \\ $2.62$ \\ $1.94$ \end{tabular}&\begin{tabular}{@{}c@{}} $0.33 \cdot 10^{-3}$ \\ $0.49 \cdot 10^{-2}$ \\ $0.98$ \end{tabular}\\
\hline
7 & \begin{tabular}{@{}c@{}} $10^{-4}$ \\ $10^{-3}$ \\ $10^{-2}$ \end{tabula

In [38]:
res[d]["residual_adaptL"]

[0.9830310981506949, 0.9830357013830413]