# Experiments 2.2. Quadratic Programming and Adaptive GD

Minimization of function

$$f(x)=\sum\limits_{i=k}^N d_i x_i^2$$
for positive $d_i$.

In [1]:
import numpy as np
import os
import matplotlib
import matplotlib.pyplot as plt
import jax
import jax.numpy as jnp
from jax.config import config
import timeit
%matplotlib inline

In [2]:
from methods import gradf_inexact
from methods import GradientDescent, parse_logs, AdaptiveL, StepSize, AdaptiveNoiseGD
from methods import ConstantStepSize, AdaptiveLdelta

In [3]:
matplotlib.use('Agg')
params = {'legend.fontsize': 20,
          'legend.handlelength': 4,
          "axes.labelsize": 45,
          "xtick.labelsize": 25,
          "ytick.labelsize": 25,
          "lines.linewidth": 2,
           "axes.titlesize":30}
matplotlib.rcParams.update(params)

In [4]:
config.update("jax_enable_x64", True)

In [5]:
path_pics = "../pics/"

In [12]:
def f1(x, A):
    m = A.shape[0]
    r = A@x
    return 1/2 * x.T @ A @ x

gradf = jax.grad(f1, argnums=0, has_aux=False)
jit_gradf = jax.jit(gradf)

## 1. Comparison of Theoretical Iterations Count and Practice

In [13]:
np.random.seed(1)
n = 100
k = 10
mu = 0.1
d = np.zeros(n)
d[:k] *= 0
d[k:] = np.linspace(mu, 1, n-k)
A = np.diag(d)
w = np.random.randn(n)

In [14]:
eigvals, _ = np.linalg.eigh(A)
eigvals.sort()
L = np.real(eigvals.max())
mu = eigvals[eigvals>=1e-12].min()
L, mu

(1.0, 0.1)

In [15]:
sigma=0
gradf = lambda x: np.array(jit_gradf(x, A).block_until_ready())
f = lambda x: f1(x, A)

In [16]:
w = np.random.randn(n)
f(np.zeros(A.shape[-1]))

0.0

The case when $\xi \sim \mathcal{U}(S_1(0))$

In [18]:
np.random.seed(1)
np.random.seed(1)
n = 100
k = 10
mu_list = [0.01, 0.1, 0.9, 0.99]
res = {mu:{"delta":[], 
           "iters_adaptL":[], "time_adaptL":[], "adaptL,x0-x*": [], "normg_adaptL": [], "residual_adaptL":[],
           "iters_exact":[], "time_exact":[], "exact,x0-x*": [], "normg_exact": [], "residual_exact":[], 
          "iters_adaptLdelta":[], "time_adaptLdelta":[], "adaptLdelta,x0-x*": [], "normg_adaptLdelta": []} for mu in mu_list}
dist = {}
number = 200

for mu in mu_list:
    d = np.zeros(n)
    d[:k] *= 0
    d[k:] = np.linspace(mu, 1, n-k)
    A = np.diag(d)
    w = np.random.randn(n)
    sigma=0
    eigvals, _ = np.linalg.eigh(A)
    eigvals.sort()
    L = np.real(eigvals.max())
    mu = eigvals[eigvals>=1e-12].min()
    L, mu
    gradf = lambda x: np.array(jit_gradf(x, A).block_until_ready())
    f = lambda x: f1(x, A)
    f(np.zeros(A.shape[-1]))
    alpha = 1/L
    w = np.ones(n)*100
    wsol = np.where(d!=0, 0, w)
    dist[mu] = np.linalg.norm(w-wsol)
    v = np.random.randn(*w.shape)
    v = np.ones(*w.shape)
    Delta_list = [1e-7, 1e-4, 1e-1]
    N = int(2e4)
    save_iter = int(1)
    tol = 1e-9
    methods = []
    print(mu, dist[mu])

    for Delta in Delta_list:
        eps = Delta**2 / 16
        f2 = lambda x: f(x) + eps*np.random.uniform(-1, 1)
        res[mu]["delta"].append(int(np.log10(Delta)))
        tol = np.sqrt(6)*Delta

        grad_inexact = lambda w: gradf_inexact(w, gradf, Delta, 1, v=v)
        method = GradientDescent(AdaptiveL(L0=1, Delta=Delta, Lmin=mu/4, delta=eps), name="GD, Delta={}".format(Delta), save_iter=save_iter)
        x = method.solve(w, f2, grad_inexact, tol=tol, max_iter=N)
        g = lambda: GradientDescent(AdaptiveL(L0=1, Delta=Delta, Lmin=mu/4, delta=eps),
                                    return_history=False).solve(w, f2, grad_inexact, tol=tol, max_iter=N)
        T = timeit.timeit(g, number=number)/number        
        print("\t{}\t{}\t{:.2f}\t{:.6f}\t{:.2f}".format(Delta, len(method.history), T*1000, np.linalg.norm(x-w), 
                                                np.linalg.norm(gradf(x))/Delta), f(x))
        methods.append(method)
        res[mu]["iters_adaptL"].append(len(method.history))
        res[mu]["time_adaptL"].append("{:.2f}".format(T*1000))
        res[mu]["adaptL,x0-x*"].append("{:.1f}".format(np.linalg.norm(x-w)))
        res[mu]["normg_adaptL"].append("{:.2f}".format(np.linalg.norm(gradf(x))/Delta))
        res[mu]["residual_adaptL"].append(f(x))


#         method = AdaptiveNoiseGD(AdaptiveLdelta(L0=1, mindelta=1e-12, Lmin=mu/4, mu=mu), name="GD, Delta={}".format(Delta), save_iter=save_iter, alpha=np.sqrt(6))
#         x = method.solve(w, f, grad_inexact, max_iter=N)
#         g = lambda: AdaptiveNoiseGD(AdaptiveLdelta(L0=1, mindelta=1e-12, Lmin=mu/4, mu=mu), return_history=False, 
#                                     alpha=np.sqrt(6)).solve(w, f, grad_inexact, max_iter=N)
#         T = timeit.timeit(g, number=number)/number        
#         print("\t{}\t{}\t{:.2f}\t{:.6f}\t{:.2f}".format(Delta, len(method.history), T*1000, np.linalg.norm(x-w), 
#                                                 np.linalg.norm(gradf(x))/Delta))
#         methods.append(method)
#         res[mu]["iters_adaptLdelta"].append(len(method.history))
#         res[mu]["time_adaptLdelta"].append("{:.2f}".format(T*1000))
#         res[mu]["adaptLdelta,x0-x*"].append("{:.1f}".format(np.linalg.norm(x-w)))
#         res[mu]["normg_adaptLdelta"].append("{:.2f}".format(np.linalg.norm(gradf(x))/Delta))        

        method = GradientDescent(ConstantStepSize(alpha), name="GD, Delta={}".format(Delta), save_iter=save_iter)
        x = method.solve(w, f, grad_inexact, tol=tol, max_iter=N)
        g = lambda: GradientDescent(ConstantStepSize(alpha),
                                    return_history=False).solve(w, f, grad_inexact, tol=tol, max_iter=N)
        T = timeit.timeit(g, number=number)/number
        print("\t{}\t{}\t{:.2f}\t{:.6f}\t{:.2f}".format(Delta, len(method.history), T*1000, np.linalg.norm(x-w), 
                                                np.linalg.norm(gradf(x))/Delta), f(x))
        methods.append(method)
        res[mu]["iters_exact"].append(len(method.history))
        res[mu]["time_exact"].append("{:.2f}".format(T*1000))
        res[mu]["exact,x0-x*"].append("{:.1f}".format(np.linalg.norm(x-w)))
        res[mu]["normg_exact"].append("{:.2f}".format(np.linalg.norm(gradf(x))/Delta))
        res[mu]["residual_exact"].append(f(x))
        print()

0.01 948.6832980505138
	1e-07	636	275.10	948.683296	2.01 1.8128103488210294e-12
	1e-07	1524	139.59	948.683296	2.34 2.543456392812393e-12

	0.0001	404	181.18	948.681255	2.04 1.8803343398275588e-06
	0.0001	836	78.32	948.680896	2.37 2.612430284126672e-06

	0.1	74	35.83	946.485034	2.30 1.9286178623074106
	0.1	153	14.99	946.162836	2.39 2.495032779695113

0.1 948.6832980505138
	1e-07	72	31.53	948.683298	1.90 1.388992588827616e-13
	1e-07	169	16.24	948.683298	2.17 2.2143446868839264e-13

	0.0001	46	19.52	948.683042	2.14 1.8793328098142807e-07
	0.0001	105	10.38	948.683037	1.95 1.7353273851737082e-07

	0.1	20	9.13	948.358430	1.80 0.1358948280284843
	0.1	41	4.51	948.256841	2.15 0.201168925125588

0.9 948.6832980505138
	1e-07	11	3.99	948.683298	0.92 4.533954798214249e-15
	1e-07	11	1.61	948.683298	0.93 4.5816913820042315e-15

	0.0001	8	3.25	948.683277	0.95 4.808811377156842e-09
	0.0001	8	1.29	948.683283	0.86 3.935524413057705e-09

	0.1	5	1.90	948.663202	0.88 0.0041037493999516814
	0.1	5	0.90	948.66

In [21]:
s = ""

for mu in mu_list:
    s += str(mu) + " & "
    cur_list = ["$10^{{{}}}$".format(i) for i in res[mu]["delta"]]
    s+= "\\begin{tabular}{@{}c@{}} " + " \\\\ ".join(cur_list) + " \\end{tabular}&"
    
    cur_list = ["${}$".format(i) for i in res[mu]["iters_exact"]]
    s+= "\\begin{tabular}{@{}c@{}} " + " \\\\ ".join(cur_list) + " \\end{tabular}&"
    cur_list = ["${}$".format(i) for i in res[mu]["time_exact"]]
    s+= "\\begin{tabular}{@{}c@{}} " + " \\\\ ".join(cur_list) + " \\end{tabular}&"

    cur_list = ["${}$".format(i) for i in res[mu]["iters_adaptL"]]
    s+= "\\begin{tabular}{@{}c@{}} " + " \\\\ ".join(cur_list) + " \\end{tabular}&"
    cur_list = ["${}$".format(i) for i in res[mu]["time_adaptL"]]
    s+= "\\begin{tabular}{@{}c@{}} " + " \\\\ ".join(cur_list) + " \\end{tabular}"
    
#     cur_list = ["${}$".format(i) for i in res[mu]["iters_adaptLdelta"]]
#     s+= "\\begin{tabular}{@{}c@{}} " + " \\\\ ".join(cur_list) + " \\end{tabular}&"
#     cur_list = ["${}$".format(i) for i in res[mu]["time_adaptLdelta"]]
#     s+= "\\begin{tabular}{@{}c@{}} " + " \\\\ ".join(cur_list) + " \\end{tabular}"

    s+= "\\\\\n\\hline\n"
print(s)

0.01 & \begin{tabular}{@{}c@{}} $10^{-7}$ \\ $10^{-4}$ \\ $10^{-1}$ \end{tabular}&\begin{tabular}{@{}c@{}} $1524$ \\ $836$ \\ $153$ \end{tabular}&\begin{tabular}{@{}c@{}} $139.59$ \\ $78.32$ \\ $14.99$ \end{tabular}&\begin{tabular}{@{}c@{}} $636$ \\ $404$ \\ $74$ \end{tabular}&\begin{tabular}{@{}c@{}} $275.10$ \\ $181.18$ \\ $35.83$ \end{tabular}\\
\hline
0.1 & \begin{tabular}{@{}c@{}} $10^{-7}$ \\ $10^{-4}$ \\ $10^{-1}$ \end{tabular}&\begin{tabular}{@{}c@{}} $169$ \\ $105$ \\ $41$ \end{tabular}&\begin{tabular}{@{}c@{}} $16.24$ \\ $10.38$ \\ $4.51$ \end{tabular}&\begin{tabular}{@{}c@{}} $72$ \\ $46$ \\ $20$ \end{tabular}&\begin{tabular}{@{}c@{}} $31.53$ \\ $19.52$ \\ $9.13$ \end{tabular}\\
\hline
0.9 & \begin{tabular}{@{}c@{}} $10^{-7}$ \\ $10^{-4}$ \\ $10^{-1}$ \end{tabular}&\begin{tabular}{@{}c@{}} $11$ \\ $8$ \\ $5$ \end{tabular}&\begin{tabular}{@{}c@{}} $1.61$ \\ $1.29$ \\ $0.90$ \end{tabular}&\begin{tabular}{@{}c@{}} $11$ \\ $8$ \\ $5$ \end{tabular}&\begin{tabular}{@{}c@{}} $3.99$

In [25]:
s = ""

for mu in mu_list:
    s += str(mu) + " & "
    #s += "{:.1f}".format(dist[mu]) + " & "
    cur_list = ["$10^{{{}}}$".format(i) for i in res[mu]["delta"]]
    
    s+= "\\begin{tabular}{@{}c@{}} " + " \\\\ ".join(cur_list) + " \\end{tabular}&"
    cur_list = ["${}$".format(i) for i in res[mu]["exact,x0-x*"]]
    s+= "\\begin{tabular}{@{}c@{}} " + " \\\\ ".join(cur_list) + " \\end{tabular}&"
    cur_list = ["${}$".format(i) for i in res[mu]["normg_exact"]]
    s+= "\\begin{tabular}{@{}c@{}} " + " \\\\ ".join(cur_list) + " \\end{tabular}&"
    cur_list1 = [(i/10**(int(np.log10(i))), int(np.log10(i))) for i in res[mu]["residual_exact"]]
    cur_list = []
    for a, b in cur_list1:
        if b != 0:
            cur_list.append("${:.2f} \\cdot 10^{{{}}}$".format(a, b))
        else:
            cur_list.append("${:.2f}$".format(a))
    s+= "\\begin{tabular}{@{}c@{}} " + " \\\\ ".join(cur_list) + " \\end{tabular}&"
    

    cur_list = ["${}$".format(i) for i in res[mu]["adaptL,x0-x*"]]
    s+= "\\begin{tabular}{@{}c@{}} " + " \\\\ ".join(cur_list) + " \\end{tabular}&"
    cur_list = ["${}$".format(i) for i in res[mu]["normg_adaptL"]]
    s+= "\\begin{tabular}{@{}c@{}} " + " \\\\ ".join(cur_list) + " \\end{tabular}&"
    cur_list1 = [(i/10**(int(np.log10(i))), int(np.log10(i))) for i in res[mu]["residual_adaptL"]]
    cur_list = []
    for a, b in cur_list1:
        if b != 0:
            cur_list.append("${:.2f} \\cdot 10^{{{}}}$".format(a, b))
        else:
            cur_list.append("${:.2f}$".format(a))
    s+= "\\begin{tabular}{@{}c@{}} " + " \\\\ ".join(cur_list) + " \\end{tabular}"
    
#     cur_list = ["${}$".format(i) for i in res[mu]["adaptLdelta,x0-x*"]]
#     s+= "\\begin{tabular}{@{}c@{}} " + " \\\\ ".join(cur_list) + " \\end{tabular}&"
#     cur_list = ["${}$".format(i) for i in res[mu]["normg_adaptLdelta"]]
#     s+= "\\begin{tabular}{@{}c@{}} " + " \\\\ ".join(cur_list) + " \\end{tabular}"

    s+= "\\\\\n\\hline\n"
print(s)

0.01 & \begin{tabular}{@{}c@{}} $10^{-7}$ \\ $10^{-4}$ \\ $10^{-1}$ \end{tabular}&\begin{tabular}{@{}c@{}} $948.7$ \\ $948.7$ \\ $946.2$ \end{tabular}&\begin{tabular}{@{}c@{}} $2.34$ \\ $2.37$ \\ $2.39$ \end{tabular}&\begin{tabular}{@{}c@{}} $0.25 \cdot 10^{-11}$ \\ $0.26 \cdot 10^{-5}$ \\ $2.50$ \end{tabular}&\begin{tabular}{@{}c@{}} $948.7$ \\ $948.7$ \\ $946.5$ \end{tabular}&\begin{tabular}{@{}c@{}} $2.01$ \\ $2.04$ \\ $2.30$ \end{tabular}&\begin{tabular}{@{}c@{}} $0.18 \cdot 10^{-11}$ \\ $0.19 \cdot 10^{-5}$ \\ $1.93$ \end{tabular}\\
\hline
0.1 & \begin{tabular}{@{}c@{}} $10^{-7}$ \\ $10^{-4}$ \\ $10^{-1}$ \end{tabular}&\begin{tabular}{@{}c@{}} $948.7$ \\ $948.7$ \\ $948.3$ \end{tabular}&\begin{tabular}{@{}c@{}} $2.17$ \\ $1.95$ \\ $2.15$ \end{tabular}&\begin{tabular}{@{}c@{}} $0.22 \cdot 10^{-12}$ \\ $0.17 \cdot 10^{-6}$ \\ $0.20$ \end{tabular}&\begin{tabular}{@{}c@{}} $948.7$ \\ $948.7$ \\ $948.4$ \end{tabular}&\begin{tabular}{@{}c@{}} $1.90$ \\ $2.14$ \\ $1.80$ \end{tabular}&\be