# Experiments 2.2. Quadratic Programming and Adaptive GD

Minimization of function

$$f(x)=\sum\limits_{i=k}^N d_i x_i^2$$
for positive $d_i$.

In [19]:
import numpy as np
import os
import matplotlib
import matplotlib.pyplot as plt
import jax
import jax.numpy as jnp
from jax.config import config
import timeit
%matplotlib inline

In [20]:
from methods import gradf_inexact
from methods import GradientDescent, parse_logs, AdaptiveL, StepSize, AdaptiveNoiseGD
from methods import ConstantStepSize, AdaptiveLdelta

In [21]:
matplotlib.use('Agg')
params = {'legend.fontsize': 20,
          'legend.handlelength': 4,
          "axes.labelsize": 45,
          "xtick.labelsize": 25,
          "ytick.labelsize": 25,
          "lines.linewidth": 2,
           "axes.titlesize":30}
matplotlib.rcParams.update(params)

In [22]:
config.update("jax_enable_x64", True)

In [23]:
path_pics = "../pics/"

In [24]:
def f1(x, A):
    m = A.shape[0]
    r = A@x
    return 1/2 * x.T @ A @ x

gradf = jax.grad(f1, argnums=0, has_aux=False)
jit_gradf = jax.jit(gradf)

## 1. Comparison of Theoretical Iterations Count and Practice

In [25]:
np.random.seed(1)
n = 100
k = 10
mu = 0.1
d = np.zeros(n)
d[:k] *= 0
d[k:] = np.linspace(mu, 1, n-k)
A = np.diag(d)
w = np.random.randn(n)

In [26]:
eigvals, _ = np.linalg.eigh(A)
eigvals.sort()
L = np.real(eigvals.max())
mu = eigvals[eigvals>=1e-12].min()
L, mu

(1.0, 0.1)

In [27]:
sigma=0
gradf = lambda x: np.array(jit_gradf(x, A).block_until_ready())
f = lambda x: f1(x, A)

In [28]:
w = np.random.randn(n)
f(np.zeros(A.shape[-1]))

0.0

The case when $\xi \sim \mathcal{U}(S_1(0))$

In [29]:
np.random.seed(1)
np.random.seed(1)
n = 100
k = 10
mu_list = [0.01, 0.1, 0.9, 0.99]
res = {mu:{"delta":[], 
           "iters_adaptL":[], "time_adaptL":[], "adaptL,x0-x*": [], "normg_adaptL": [],
           "iters_exact":[], "time_exact":[], "exact,x0-x*": [], "normg_exact": [],
          "iters_adaptLdelta":[], "time_adaptLdelta":[], "adaptLdelta,x0-x*": [], "normg_adaptLdelta": []} for mu in mu_list}
dist = {}
number = 200

for mu in mu_list:
    d = np.zeros(n)
    d[:k] *= 0
    d[k:] = np.linspace(mu, 1, n-k)
    A = np.diag(d)
    w = np.random.randn(n)
    sigma=0
    eigvals, _ = np.linalg.eigh(A)
    eigvals.sort()
    L = np.real(eigvals.max())
    mu = eigvals[eigvals>=1e-12].min()
    L, mu
    gradf = lambda x: np.array(jit_gradf(x, A).block_until_ready())
    f = lambda x: f1(x, A)
    f(np.zeros(A.shape[-1]))
    alpha = 1/L
    w = np.ones(n)*100
    wsol = np.where(d!=0, 0, w)
    dist[mu] = np.linalg.norm(w-wsol)
    v = np.random.randn(*w.shape)
    v = np.ones(*w.shape)
    Delta_list = [1e-3, 1e-4, 1e-1]
    N = int(2e4)
    save_iter = int(1)
    tol = 1e-9
    methods = []
    print(mu, dist[mu])

    for Delta in Delta_list:
        res[mu]["delta"].append(int(np.log10(Delta)))
        tol = np.sqrt(6)*Delta

        grad_inexact = lambda w: gradf_inexact(w, gradf, Delta, 1, v=v)
        method = GradientDescent(AdaptiveL(L0=1, Delta=Delta, Lmin=mu/4), name="GD, Delta={}".format(Delta), save_iter=save_iter)
        x = method.solve(w, f, grad_inexact, tol=tol, max_iter=N)
        g = lambda: GradientDescent(AdaptiveL(L0=1, Delta=Delta, Lmin=mu/4),
                                    return_history=False).solve(w, f, grad_inexact, tol=tol, max_iter=N)
        T = timeit.timeit(g, number=number)/number        
        print("\t{}\t{}\t{:.2f}\t{:.6f}\t{:.2f}".format(Delta, len(method.history), T*1000, np.linalg.norm(x-w), 
                                                np.linalg.norm(gradf(x))/Delta))
        methods.append(method)
        res[mu]["iters_adaptL"].append(len(method.history))
        res[mu]["time_adaptL"].append("{:.2f}".format(T*1000))
        res[mu]["adaptL,x0-x*"].append("{:.1f}".format(np.linalg.norm(x-w)))
        res[mu]["normg_adaptL"].append("{:.2f}".format(np.linalg.norm(gradf(x))/Delta))


        method = AdaptiveNoiseGD(AdaptiveLdelta(L0=1, mindelta=1e-12, Lmin=mu/4, mu=mu), name="GD, Delta={}".format(Delta), save_iter=save_iter, alpha=np.sqrt(6))
        x = method.solve(w, f, grad_inexact, max_iter=N)
        g = lambda: AdaptiveNoiseGD(AdaptiveLdelta(L0=1, mindelta=1e-12, Lmin=mu/4, mu=mu), return_history=False, 
                                    alpha=np.sqrt(6)).solve(w, f, grad_inexact, max_iter=N)
        T = timeit.timeit(g, number=number)/number        
        print("\t{}\t{}\t{:.2f}\t{:.6f}\t{:.2f}".format(Delta, len(method.history), T*1000, np.linalg.norm(x-w), 
                                                np.linalg.norm(gradf(x))/Delta))
        methods.append(method)
        res[mu]["iters_adaptLdelta"].append(len(method.history))
        res[mu]["time_adaptLdelta"].append("{:.2f}".format(T*1000))
        res[mu]["adaptLdelta,x0-x*"].append("{:.1f}".format(np.linalg.norm(x-w)))
        res[mu]["normg_adaptLdelta"].append("{:.2f}".format(np.linalg.norm(gradf(x))/Delta))        

        method = GradientDescent(ConstantStepSize(alpha), name="GD, Delta={}".format(Delta), save_iter=save_iter)
        x = method.solve(w, f, grad_inexact, tol=tol, max_iter=N)
        g = lambda: GradientDescent(ConstantStepSize(alpha),
                                    return_history=False).solve(w, f, grad_inexact, tol=tol, max_iter=N)
        T = timeit.timeit(g, number=number)/number
        print("\t{}\t{}\t{:.2f}\t{:.6f}\t{:.2f}".format(Delta, len(method.history), T*1000, np.linalg.norm(x-w), 
                                                np.linalg.norm(gradf(x))/Delta))
        methods.append(method)
        res[mu]["iters_exact"].append(len(method.history))
        res[mu]["time_exact"].append("{:.2f}".format(T*1000))
        res[mu]["exact,x0-x*"].append("{:.1f}".format(np.linalg.norm(x-w)))
        res[mu]["normg_exact"].append("{:.2f}".format(np.linalg.norm(gradf(x))/Delta))
        print()

0.01 948.6832980505138
	0.001	225	105.52	948.663411	2.21
	0.001	300	180.61	948.673899	1.09
	0.001	607	57.03	948.659351	2.32

	0.0001	291	122.95	948.680923	2.30
	0.0001	343	205.60	948.682199	1.72
	0.0001	839	78.38	948.680962	2.31

	0.1	76	30.81	946.396069	2.33
	0.1	173	99.76	947.713411	1.03
	0.1	155	14.76	946.209419	2.37

0.1 948.6832980505138
	0.001	38	15.37	948.680611	2.08
	0.001	82	45.09	948.682296	0.80
	0.001	82	8.14	948.679989	2.24

	0.0001	48	18.86	948.683017	2.19
	0.0001	92	45.37	948.683201	0.76
	0.0001	103	9.98	948.682966	2.34

	0.1	22	8.51	948.385163	1.87
	0.1	67	33.33	948.545225	0.81
	0.1	41	4.48	948.252326	2.16

0.9 948.6832980505138
	0.001	22	7.87	948.682921	0.92
	0.001	42	24.85	948.682601	0.76
	0.001	7	1.13	948.683210	0.90

	0.0001	26	10.27	948.683294	0.88
	0.0001	45	26.67	948.683238	0.70
	0.0001	8	1.24	948.683269	0.93

	0.1	15	5.39	948.666445	0.89
	0.1	38	20.70	948.611310	0.74
	0.1	5	0.88	948.655075	0.95

0.99 948.6832980505138
	0.001	20	7.69	948.681319	2.06
	0.001	45	25.3

In [30]:
s = ""

for mu in mu_list:
    s += str(mu) + " & "
    cur_list = ["$10^{{{}}}$".format(i) for i in res[mu]["delta"]]
    s+= "\\begin{tabular}{@{}c@{}} " + " \\\\ ".join(cur_list) + " \\end{tabular}&"
    
    cur_list = ["${}$".format(i) for i in res[mu]["iters_exact"]]
    s+= "\\begin{tabular}{@{}c@{}} " + " \\\\ ".join(cur_list) + " \\end{tabular}&"
    cur_list = ["${}$".format(i) for i in res[mu]["time_exact"]]
    s+= "\\begin{tabular}{@{}c@{}} " + " \\\\ ".join(cur_list) + " \\end{tabular}&"

    cur_list = ["${}$".format(i) for i in res[mu]["iters_adaptL"]]
    s+= "\\begin{tabular}{@{}c@{}} " + " \\\\ ".join(cur_list) + " \\end{tabular}&"
    cur_list = ["${}$".format(i) for i in res[mu]["time_adaptL"]]
    s+= "\\begin{tabular}{@{}c@{}} " + " \\\\ ".join(cur_list) + " \\end{tabular}&"
    
    cur_list = ["${}$".format(i) for i in res[mu]["iters_adaptLdelta"]]
    s+= "\\begin{tabular}{@{}c@{}} " + " \\\\ ".join(cur_list) + " \\end{tabular}&"
    cur_list = ["${}$".format(i) for i in res[mu]["time_adaptLdelta"]]
    s+= "\\begin{tabular}{@{}c@{}} " + " \\\\ ".join(cur_list) + " \\end{tabular}"

    s+= "\\\\\n\\hline\n"
print(s)

0.01 & \begin{tabular}{@{}c@{}} $10^{-3}$ \\ $10^{-4}$ \\ $10^{-1}$ \end{tabular}&\begin{tabular}{@{}c@{}} $607$ \\ $839$ \\ $155$ \end{tabular}&\begin{tabular}{@{}c@{}} $57.03$ \\ $78.38$ \\ $14.76$ \end{tabular}&\begin{tabular}{@{}c@{}} $225$ \\ $291$ \\ $76$ \end{tabular}&\begin{tabular}{@{}c@{}} $105.52$ \\ $122.95$ \\ $30.81$ \end{tabular}&\begin{tabular}{@{}c@{}} $300$ \\ $343$ \\ $173$ \end{tabular}&\begin{tabular}{@{}c@{}} $180.61$ \\ $205.60$ \\ $99.76$ \end{tabular}\\
\hline
0.1 & \begin{tabular}{@{}c@{}} $10^{-3}$ \\ $10^{-4}$ \\ $10^{-1}$ \end{tabular}&\begin{tabular}{@{}c@{}} $82$ \\ $103$ \\ $41$ \end{tabular}&\begin{tabular}{@{}c@{}} $8.14$ \\ $9.98$ \\ $4.48$ \end{tabular}&\begin{tabular}{@{}c@{}} $38$ \\ $48$ \\ $22$ \end{tabular}&\begin{tabular}{@{}c@{}} $15.37$ \\ $18.86$ \\ $8.51$ \end{tabular}&\begin{tabular}{@{}c@{}} $82$ \\ $92$ \\ $67$ \end{tabular}&\begin{tabular}{@{}c@{}} $45.09$ \\ $45.37$ \\ $33.33$ \end{tabular}\\
\hline
0.9 & \begin{tabular}{@{}c@{}} $10^{

In [31]:
s = ""

for mu in mu_list:
    s += str(mu) + " & "
    #s += "{:.1f}".format(dist[mu]) + " & "
    cur_list = ["$10^{{{}}}$".format(i) for i in res[mu]["delta"]]
    
    s+= "\\begin{tabular}{@{}c@{}} " + " \\\\ ".join(cur_list) + " \\end{tabular}&"
    cur_list = ["${}$".format(i) for i in res[mu]["exact,x0-x*"]]
    s+= "\\begin{tabular}{@{}c@{}} " + " \\\\ ".join(cur_list) + " \\end{tabular}&"
    cur_list = ["${}$".format(i) for i in res[mu]["normg_exact"]]
    s+= "\\begin{tabular}{@{}c@{}} " + " \\\\ ".join(cur_list) + " \\end{tabular}&"


    cur_list = ["${}$".format(i) for i in res[mu]["adaptL,x0-x*"]]
    s+= "\\begin{tabular}{@{}c@{}} " + " \\\\ ".join(cur_list) + " \\end{tabular}&"
    cur_list = ["${}$".format(i) for i in res[mu]["normg_adaptL"]]
    s+= "\\begin{tabular}{@{}c@{}} " + " \\\\ ".join(cur_list) + " \\end{tabular}&"
    
    cur_list = ["${}$".format(i) for i in res[mu]["adaptLdelta,x0-x*"]]
    s+= "\\begin{tabular}{@{}c@{}} " + " \\\\ ".join(cur_list) + " \\end{tabular}&"
    cur_list = ["${}$".format(i) for i in res[mu]["normg_adaptLdelta"]]
    s+= "\\begin{tabular}{@{}c@{}} " + " \\\\ ".join(cur_list) + " \\end{tabular}"

    s+= "\\\\\n\\hline\n"
print(s)

0.01 & \begin{tabular}{@{}c@{}} $10^{-3}$ \\ $10^{-4}$ \\ $10^{-1}$ \end{tabular}&\begin{tabular}{@{}c@{}} $948.7$ \\ $948.7$ \\ $946.2$ \end{tabular}&\begin{tabular}{@{}c@{}} $2.32$ \\ $2.31$ \\ $2.37$ \end{tabular}&\begin{tabular}{@{}c@{}} $948.7$ \\ $948.7$ \\ $946.4$ \end{tabular}&\begin{tabular}{@{}c@{}} $2.21$ \\ $2.30$ \\ $2.33$ \end{tabular}&\begin{tabular}{@{}c@{}} $948.7$ \\ $948.7$ \\ $947.7$ \end{tabular}&\begin{tabular}{@{}c@{}} $1.09$ \\ $1.72$ \\ $1.03$ \end{tabular}\\
\hline
0.1 & \begin{tabular}{@{}c@{}} $10^{-3}$ \\ $10^{-4}$ \\ $10^{-1}$ \end{tabular}&\begin{tabular}{@{}c@{}} $948.7$ \\ $948.7$ \\ $948.3$ \end{tabular}&\begin{tabular}{@{}c@{}} $2.24$ \\ $2.34$ \\ $2.16$ \end{tabular}&\begin{tabular}{@{}c@{}} $948.7$ \\ $948.7$ \\ $948.4$ \end{tabular}&\begin{tabular}{@{}c@{}} $2.08$ \\ $2.19$ \\ $1.87$ \end{tabular}&\begin{tabular}{@{}c@{}} $948.7$ \\ $948.7$ \\ $948.5$ \end{tabular}&\begin{tabular}{@{}c@{}} $0.80$ \\ $0.76$ \\ $0.81$ \end{tabular}\\
\hline
0.9 & \be