# Experiments 2.2. Quadratic Programming and Adaptive GD

Minimization of function

$$f(x)=\sum\limits_{i=k}^N d_i x_i^2$$
for positive $d_i$.

In [2]:
import numpy as np
import os
import matplotlib
import matplotlib.pyplot as plt
import jax
import jax.numpy as jnp
from jax.config import config
import timeit
%matplotlib inline

In [3]:
from methods import gradf_inexact
from methods import GradientDescent, parse_logs, AdaptiveL, StepSize, AdaptiveNoiseGD
from methods import ConstantStepSize, AdaptiveLdelta

In [4]:
matplotlib.use('Agg')
params = {'legend.fontsize': 20,
          'legend.handlelength': 4,
          "axes.labelsize": 45,
          "xtick.labelsize": 25,
          "ytick.labelsize": 25,
          "lines.linewidth": 2,
           "axes.titlesize":30}
matplotlib.rcParams.update(params)

In [5]:
config.update("jax_enable_x64", True)

In [6]:
path_pics = "../pics/"

In [7]:
def f1(x, A):
    m = A.shape[0]
    r = A@x
    return 1/2 * x.T @ A @ x

gradf = jax.grad(f1, argnums=0, has_aux=False)
jit_gradf = jax.jit(gradf)

## 1. Comparison of Theoretical Iterations Count and Practice

In [8]:
np.random.seed(1)
n = 100
k = 10
mu = 0.1
d = np.zeros(n)
d[:k] *= 0
d[k:] = np.linspace(mu, 1, n-k)
A = np.diag(d)
w = np.random.randn(n)

In [9]:
eigvals, _ = np.linalg.eigh(A)
eigvals.sort()
L = np.real(eigvals.max())
mu = eigvals[eigvals>=1e-12].min()
L, mu

(1.0, 0.1)

In [10]:
sigma=0
gradf = lambda x: np.array(jit_gradf(x, A).block_until_ready())
f = lambda x: f1(x, A)

In [11]:
w = np.random.randn(n)
f(np.zeros(A.shape[-1]))

0.0

The case when $\xi \sim \mathcal{U}(S_1(0))$

In [12]:
np.random.seed(1)
np.random.seed(1)
n = 100
k = 10
mu_list = [0.01, 0.1, 0.9, 0.99]
res = {mu:{"delta":[], 
           "iters_adaptL":[], "time_adaptL":[], "adaptL,x0-x*": [], "normg_adaptL": [],
           "iters_exact":[], "time_exact":[], "exact,x0-x*": [], "normg_exact": [],
          "iters_adaptLdelta":[], "time_adaptLdelta":[], "adaptLdelta,x0-x*": [], "normg_adaptLdelta": []} for mu in mu_list}
dist = {}
number = 200

for mu in mu_list:
    d = np.zeros(n)
    d[:k] *= 0
    d[k:] = np.linspace(mu, 1, n-k)
    A = np.diag(d)
    w = np.random.randn(n)
    sigma=0
    eigvals, _ = np.linalg.eigh(A)
    eigvals.sort()
    L = np.real(eigvals.max())
    mu = eigvals[eigvals>=1e-12].min()
    L, mu
    gradf = lambda x: np.array(jit_gradf(x, A).block_until_ready())
    f = lambda x: f1(x, A)
    f(np.zeros(A.shape[-1]))
    alpha = 1/L
    w = np.ones(n)*100
    wsol = np.where(d!=0, 0, w)
    dist[mu] = np.linalg.norm(w-wsol)
    v = np.random.randn(*w.shape)
    v = np.ones(*w.shape)
    Delta_list = [1e-7, 1e-4, 1e-1]
    N = int(2e4)
    save_iter = int(1)
    tol = 1e-9
    methods = []
    print(mu, dist[mu])

    for Delta in Delta_list:
        res[mu]["delta"].append(int(np.log10(Delta)))
        tol = np.sqrt(6)*Delta

        grad_inexact = lambda w: gradf_inexact(w, gradf, Delta, 1, v=v)
        method = GradientDescent(AdaptiveL(L0=1, Delta=Delta, Lmin=mu/4), name="GD, Delta={}".format(Delta), save_iter=save_iter)
        x = method.solve(w, f, grad_inexact, tol=tol, max_iter=N)
        g = lambda: GradientDescent(AdaptiveL(L0=1, Delta=Delta, Lmin=mu/4),
                                    return_history=False).solve(w, f, grad_inexact, tol=tol, max_iter=N)
        T = timeit.timeit(g, number=number)/number        
        print("\t{}\t{}\t{:.2f}\t{:.6f}\t{:.2f}".format(Delta, len(method.history), T*1000, np.linalg.norm(x-w), 
                                                np.linalg.norm(gradf(x))/Delta))
        methods.append(method)
        res[mu]["iters_adaptL"].append(len(method.history))
        res[mu]["time_adaptL"].append("{:.2f}".format(T*1000))
        res[mu]["adaptL,x0-x*"].append("{:.1f}".format(np.linalg.norm(x-w)))
        res[mu]["normg_adaptL"].append("{:.2f}".format(np.linalg.norm(gradf(x))/Delta))


        method = AdaptiveNoiseGD(AdaptiveLdelta(L0=1, mindelta=1e-12, Lmin=mu/4, mu=mu), name="GD, Delta={}".format(Delta), save_iter=save_iter, alpha=np.sqrt(6))
        x = method.solve(w, f, grad_inexact, max_iter=N)
        g = lambda: AdaptiveNoiseGD(AdaptiveLdelta(L0=1, mindelta=1e-12, Lmin=mu/4, mu=mu), return_history=False, 
                                    alpha=np.sqrt(6)).solve(w, f, grad_inexact, max_iter=N)
        T = timeit.timeit(g, number=number)/number        
        print("\t{}\t{}\t{:.2f}\t{:.6f}\t{:.2f}".format(Delta, len(method.history), T*1000, np.linalg.norm(x-w), 
                                                np.linalg.norm(gradf(x))/Delta))
        methods.append(method)
        res[mu]["iters_adaptLdelta"].append(len(method.history))
        res[mu]["time_adaptLdelta"].append("{:.2f}".format(T*1000))
        res[mu]["adaptLdelta,x0-x*"].append("{:.1f}".format(np.linalg.norm(x-w)))
        res[mu]["normg_adaptLdelta"].append("{:.2f}".format(np.linalg.norm(gradf(x))/Delta))        

        method = GradientDescent(ConstantStepSize(alpha), name="GD, Delta={}".format(Delta), save_iter=save_iter)
        x = method.solve(w, f, grad_inexact, tol=tol, max_iter=N)
        g = lambda: GradientDescent(ConstantStepSize(alpha),
                                    return_history=False).solve(w, f, grad_inexact, tol=tol, max_iter=N)
        T = timeit.timeit(g, number=number)/number
        print("\t{}\t{}\t{:.2f}\t{:.6f}\t{:.2f}".format(Delta, len(method.history), T*1000, np.linalg.norm(x-w), 
                                                np.linalg.norm(gradf(x))/Delta))
        methods.append(method)
        res[mu]["iters_exact"].append(len(method.history))
        res[mu]["time_exact"].append("{:.2f}".format(T*1000))
        res[mu]["exact,x0-x*"].append("{:.1f}".format(np.linalg.norm(x-w)))
        res[mu]["normg_exact"].append("{:.2f}".format(np.linalg.norm(gradf(x))/Delta))
        print()



0.01 948.6832980505138
	1e-07	511	226.03	948.683296	2.03
	1e-07	515	412.39	948.683297	1.84
	1e-07	1525	142.31	948.683296	2.29

	0.0001	301	158.16	948.681419	2.31
	0.0001	314	247.06	948.681845	2.43
	0.0001	840	75.99	948.680964	2.26

	0.1	85	33.94	946.705087	1.95
	0.1	170	103.11	947.729663	1.07
	0.1	159	14.81	946.329848	2.27

0.1 948.6832980505138
	1e-07	76	29.58	948.683298	1.97
	1e-07	102	60.81	948.683298	0.87
	1e-07	169	15.96	948.683298	2.17

	0.0001	49	18.11	948.683106	1.68
	0.0001	94	44.58	948.683195	0.80
	0.0001	104	10.15	948.683008	2.18

	0.1	24	8.21	948.310453	2.26
	0.1	54	32.44	948.541410	0.83
	0.1	41	4.46	948.267336	2.14

0.9 948.6832980505138
	1e-07	37	15.59	948.683298	1.58
	1e-07	72	36.20	948.683298	0.69
	1e-07	11	1.57	948.683298	0.92

	0.0001	26	19.90	948.683274	0.96
	0.0001	48	51.72	948.683229	0.79
	0.0001	8	1.28	948.683288	0.91

	0.1	15	7.92	948.648986	0.95
	0.1	39	47.80	948.613786	0.72
	0.1	5	0.90	948.663619	0.96

0.99 948.6832980505138
	1e-07	34	13.38	948.683298	0.95
	1e-

In [17]:
s = ""

for mu in mu_list:
    s += str(mu) + " & "
    cur_list = ["$10^{{{}}}$".format(i) for i in res[mu]["delta"]]
    s+= "\\begin{tabular}{@{}c@{}} " + " \\\\ ".join(cur_list) + " \\end{tabular}&"
    
    cur_list = ["${}$".format(i) for i in res[mu]["iters_exact"]]
    s+= "\\begin{tabular}{@{}c@{}} " + " \\\\ ".join(cur_list) + " \\end{tabular}&"
    cur_list = ["${}$".format(i) for i in res[mu]["time_exact"]]
    s+= "\\begin{tabular}{@{}c@{}} " + " \\\\ ".join(cur_list) + " \\end{tabular}&"

    cur_list = ["${}$".format(i) for i in res[mu]["iters_adaptL"]]
    s+= "\\begin{tabular}{@{}c@{}} " + " \\\\ ".join(cur_list) + " \\end{tabular}&"
    cur_list = ["${}$".format(i) for i in res[mu]["time_adaptL"]]
    s+= "\\begin{tabular}{@{}c@{}} " + " \\\\ ".join(cur_list) + " \\end{tabular}&"
    
    cur_list = ["${}$".format(i) for i in res[mu]["iters_adaptLdelta"]]
    s+= "\\begin{tabular}{@{}c@{}} " + " \\\\ ".join(cur_list) + " \\end{tabular}&"
    cur_list = ["${}$".format(i) for i in res[mu]["time_adaptLdelta"]]
    s+= "\\begin{tabular}{@{}c@{}} " + " \\\\ ".join(cur_list) + " \\end{tabular}"

    s+= "\\\\\n\\hline\n"
print(s)

0.01 & \begin{tabular}{@{}c@{}} $10^{-7}$ \\ $10^{-4}$ \\ $10^{-1}$ \end{tabular}&\begin{tabular}{@{}c@{}} $511$ \\ $301$ \\ $85$ \end{tabular}&\begin{tabular}{@{}c@{}} $226.03$ \\ $158.16$ \\ $33.94$ \end{tabular}&\begin{tabular}{@{}c@{}} $515$ \\ $314$ \\ $170$ \end{tabular}&\begin{tabular}{@{}c@{}} $412.39$ \\ $247.06$ \\ $103.11$ \end{tabular}\\
\hline
0.1 & \begin{tabular}{@{}c@{}} $10^{-7}$ \\ $10^{-4}$ \\ $10^{-1}$ \end{tabular}&\begin{tabular}{@{}c@{}} $76$ \\ $49$ \\ $24$ \end{tabular}&\begin{tabular}{@{}c@{}} $29.58$ \\ $18.11$ \\ $8.21$ \end{tabular}&\begin{tabular}{@{}c@{}} $102$ \\ $94$ \\ $54$ \end{tabular}&\begin{tabular}{@{}c@{}} $60.81$ \\ $44.58$ \\ $32.44$ \end{tabular}\\
\hline
0.9 & \begin{tabular}{@{}c@{}} $10^{-7}$ \\ $10^{-4}$ \\ $10^{-1}$ \end{tabular}&\begin{tabular}{@{}c@{}} $37$ \\ $26$ \\ $15$ \end{tabular}&\begin{tabular}{@{}c@{}} $15.59$ \\ $19.90$ \\ $7.92$ \end{tabular}&\begin{tabular}{@{}c@{}} $72$ \\ $48$ \\ $39$ \end{tabular}&\begin{tabular}{@{}c@{}}

In [18]:
s = ""

for mu in mu_list:
    s += str(mu) + " & "
    #s += "{:.1f}".format(dist[mu]) + " & "
    cur_list = ["$10^{{{}}}$".format(i) for i in res[mu]["delta"]]
    
    s+= "\\begin{tabular}{@{}c@{}} " + " \\\\ ".join(cur_list) + " \\end{tabular}&"
    cur_list = ["${}$".format(i) for i in res[mu]["exact,x0-x*"]]
    s+= "\\begin{tabular}{@{}c@{}} " + " \\\\ ".join(cur_list) + " \\end{tabular}&"
    cur_list = ["${}$".format(i) for i in res[mu]["normg_exact"]]
    s+= "\\begin{tabular}{@{}c@{}} " + " \\\\ ".join(cur_list) + " \\end{tabular}&"


    cur_list = ["${}$".format(i) for i in res[mu]["adaptL,x0-x*"]]
    s+= "\\begin{tabular}{@{}c@{}} " + " \\\\ ".join(cur_list) + " \\end{tabular}&"
    cur_list = ["${}$".format(i) for i in res[mu]["normg_adaptL"]]
    s+= "\\begin{tabular}{@{}c@{}} " + " \\\\ ".join(cur_list) + " \\end{tabular}&"
    
    cur_list = ["${}$".format(i) for i in res[mu]["adaptLdelta,x0-x*"]]
    s+= "\\begin{tabular}{@{}c@{}} " + " \\\\ ".join(cur_list) + " \\end{tabular}&"
    cur_list = ["${}$".format(i) for i in res[mu]["normg_adaptLdelta"]]
    s+= "\\begin{tabular}{@{}c@{}} " + " \\\\ ".join(cur_list) + " \\end{tabular}"

    s+= "\\\\\n\\hline\n"
print(s)

0.01 & \begin{tabular}{@{}c@{}} $10^{-7}$ \\ $10^{-4}$ \\ $10^{-1}$ \end{tabular}&\begin{tabular}{@{}c@{}} $948.7$ \\ $948.7$ \\ $946.3$ \end{tabular}&\begin{tabular}{@{}c@{}} $2.29$ \\ $2.26$ \\ $2.27$ \end{tabular}&\begin{tabular}{@{}c@{}} $948.7$ \\ $948.7$ \\ $946.7$ \end{tabular}&\begin{tabular}{@{}c@{}} $2.03$ \\ $2.31$ \\ $1.95$ \end{tabular}&\begin{tabular}{@{}c@{}} $948.7$ \\ $948.7$ \\ $947.7$ \end{tabular}&\begin{tabular}{@{}c@{}} $1.84$ \\ $2.43$ \\ $1.07$ \end{tabular}\\
\hline
0.1 & \begin{tabular}{@{}c@{}} $10^{-7}$ \\ $10^{-4}$ \\ $10^{-1}$ \end{tabular}&\begin{tabular}{@{}c@{}} $948.7$ \\ $948.7$ \\ $948.3$ \end{tabular}&\begin{tabular}{@{}c@{}} $2.17$ \\ $2.18$ \\ $2.14$ \end{tabular}&\begin{tabular}{@{}c@{}} $948.7$ \\ $948.7$ \\ $948.3$ \end{tabular}&\begin{tabular}{@{}c@{}} $1.97$ \\ $1.68$ \\ $2.26$ \end{tabular}&\begin{tabular}{@{}c@{}} $948.7$ \\ $948.7$ \\ $948.5$ \end{tabular}&\begin{tabular}{@{}c@{}} $0.87$ \\ $0.80$ \\ $0.83$ \end{tabular}\\
\hline
0.9 & \be