### Algorithm for gradient method

At the first part of this notebook, we will use this function

q = $ \frac{1}{2} * x^T * A * x - b * x $

In [1]:
import numpy as np

In [2]:
matrixSize = 10
A = np.random.rand(matrixSize, matrixSize)
A = np.dot(A, A.transpose())
b = np.random.rand(matrixSize, 1)

In [3]:
np.set_printoptions(precision=3)
print(A)

[[2.516 1.869 2.329 1.406 2.259 2.167 2.102 1.88  2.223 2.316]
 [1.869 4.099 3.148 1.844 3.836 2.455 3.247 2.48  3.049 2.539]
 [2.329 3.148 3.841 1.645 3.281 2.896 3.078 2.887 3.049 2.826]
 [1.406 1.844 1.645 2.626 2.599 1.863 2.617 1.868 1.627 1.566]
 [2.259 3.836 3.281 2.599 4.322 2.808 3.53  2.706 3.04  2.53 ]
 [2.167 2.455 2.896 1.863 2.808 2.868 2.869 2.755 2.227 2.691]
 [2.102 3.247 3.078 2.617 3.53  2.869 4.217 2.64  3.018 2.846]
 [1.88  2.48  2.887 1.868 2.706 2.755 2.64  3.176 1.89  2.825]
 [2.223 3.049 3.049 1.627 3.04  2.227 3.018 1.89  3.19  2.316]
 [2.316 2.539 2.826 1.566 2.53  2.691 2.846 2.825 2.316 3.111]]


In [4]:
np.set_printoptions(precision=3)
print(b)

[[0.187]
 [0.924]
 [0.649]
 [0.194]
 [0.082]
 [0.587]
 [0.041]
 [0.249]
 [0.611]
 [0.158]]


In [5]:
def model(x):
    return 0.5 * np.dot(np.dot(x.T, A), x) - np.dot(b.T, x)

def model_jac(x):
    return np.dot(A, x) - b

def model_hess(x):
    return A

In [6]:
def print_perf(f_value, iteration, h, flag):
    """
        Optimization terminated successfully.
         Current function value: 0.000000
         Iterations: 339
         Function evaluations: 571
    """
    if flag:
        print("Optimization terminated successfully")
    else:
        print("No convergence")
    print(f"Current Gradient Value : {f_value}")
    print(f"Iterations : {iteration}")
    print(f"Function evaluations : {h}")


def gradient_descent_method(A, x0, b, eps=1e-3, max_iter=100, gradient=model_jac, hessian=model_hess):
    x = x0.copy()
    g = gradient(x)
    k = 0
    while k < max_iter and np.linalg.norm(g) > eps:
        alpha = np.dot(g.T, g) / np.dot(np.dot(g.T, hessian(x)), g) # comment inclure ceci ?
        x -= alpha * g
        g = gradient(x)
        k += 1
    print_perf(np.linalg.norm(g), k, k+1, np.linalg.norm(g) <= eps)
    return x

In [7]:
def gradient_descent_method_optimised(A, x0, b, eps=1e-3, max_iter=100, gradient=model_jac, hessian=model_hess):
    x = x0.copy()
    g = gradient(x)
    k = 0
    while k < max_iter and np.linalg.norm(g) > eps:
        r = np.dot(hessian(x), g)
        alpha = (np.dot(g.T, g) / np.dot(g.T, r))
        x -= alpha * g
        g -= alpha * r
        k += 1
    print_perf(np.linalg.norm(g), k, k+1, np.linalg.norm(g) <= eps)
    return x

In [8]:
x0 = np.random.rand(matrixSize, 1)

In [9]:
print(x0)

[[0.585]
 [0.599]
 [0.59 ]
 [0.105]
 [0.392]
 [0.753]
 [0.58 ]
 [0.353]
 [0.704]
 [0.164]]


In [10]:
xk = gradient_descent_method(A, x0, b)

No convergence
Current Gradient Value : 0.41552457963716005
Iterations : 100
Function evaluations : 101


In [11]:
print(xk)

[[ 0.016]
 [ 2.243]
 [ 0.289]
 [ 1.432]
 [-2.685]
 [ 2.022]
 [-1.129]
 [-0.585]
 [ 0.356]
 [-1.002]]


In [12]:
xk = gradient_descent_method_optimised(A, x0, b)

No convergence
Current Gradient Value : 0.41552457963726386
Iterations : 100
Function evaluations : 101


In [13]:
print(xk)

[[ 0.016]
 [ 2.243]
 [ 0.289]
 [ 1.432]
 [-2.685]
 [ 2.022]
 [-1.129]
 [-0.585]
 [ 0.356]
 [-1.002]]


In [14]:
xk = gradient_descent_method_optimised(A, x0, b, max_iter=10000)

No convergence
Current Gradient Value : 0.08009917473126797
Iterations : 10000
Function evaluations : 10001


In [15]:
xk = gradient_descent_method_optimised(A, x0, b, max_iter=50000)

No convergence
Current Gradient Value : 0.011971371068086985
Iterations : 50000
Function evaluations : 50001


In [16]:
xk = gradient_descent_method_optimised(A, x0, b, max_iter=100000)

No convergence
Current Gradient Value : 0.001112500562532827
Iterations : 100000
Function evaluations : 100001


### Test on more complex function

In [17]:
def rosen(x):
    """The Rosenbrock function"""
    return sum(100.0*(x[1:]-x[:-1]**2.0)**2.0 + (1-x[:-1])**2.0)

def rosen_der(x):
    """The Rosenbrock Derivative function"""
    xm = x[1:-1]
    xm_m1 = x[:-2]
    xm_p1 = x[2:]
    der = np.zeros_like(x)
    der[1:-1] = 200*(xm-xm_m1**2) - 400*(xm_p1 - xm**2)*xm - 2*(1-xm)
    der[0] = -400*x[0]*(x[1]-x[0]**2) - 2*(1-x[0])
    der[-1] = 200*(x[-1]-x[-2]**2)
    return der

def rosen_hess(x):
    """The Rosenbrock Hessian function"""
    x = np.asarray(x)
    H = np.diag(-400*x[:-1],1) - np.diag(400*x[:-1],-1)
    diagonal = np.zeros_like(x)
    diagonal[0] = 1200*x[0]**2-400*x[1]+2
    diagonal[-1] = 200
    diagonal[1:-1] = 202 + 1200*x[1:-1]**2 - 400*x[2:]
    H = H + np.diag(diagonal)
    return H

In [18]:
def gradient_descent_method(x0, gradient=rosen_der, hessian=rosen_hess, eps=1e-3, max_iter=100):
    x = x0.copy()
    g = gradient(x)
    k = 0
    while k < max_iter and np.linalg.norm(g) > eps:
        alpha = np.dot(g.T, g) / np.dot(np.dot(g.T, hessian(x)), g) # comment inclure ceci ?
        x -= alpha * g
        g = gradient(x)
        k += 1
    print_perf(np.linalg.norm(g), k, k+1, np.linalg.norm(g) <= eps)
    return x

In [19]:
x0 = np.random.rand(matrixSize)+1
print(x0)

[1.451 1.785 1.151 1.459 1.27  1.115 1.027 1.434 1.083 1.825]


In [23]:
np.set_printoptions(1)

In [24]:
print(rosen_hess(x0))

[[1815.7 -580.5    0.     0.     0.     0.     0.     0.     0.     0. ]
 [-580.5 3563.7 -713.9    0.     0.     0.     0.     0.     0.     0. ]
 [   0.  -713.9 1208.1 -460.4    0.     0.     0.     0.     0.     0. ]
 [   0.     0.  -460.4 2247.1 -583.4    0.     0.     0.     0.     0. ]
 [   0.     0.     0.  -583.4 1691.1 -508.     0.     0.     0.     0. ]
 [   0.     0.     0.     0.  -508.  1284.5 -446.2    0.     0.     0. ]
 [   0.     0.     0.     0.     0.  -446.2  892.8 -410.6    0.     0. ]
 [   0.     0.     0.     0.     0.     0.  -410.6 2236.6 -573.6    0. ]
 [   0.     0.     0.     0.     0.     0.     0.  -573.6  879.1 -433.1]
 [   0.     0.     0.     0.     0.     0.     0.     0.  -433.1  200. ]]


In [25]:
xk = gradient_descent_method(x0)

No convergence
Current Gradient Value : 0.39445736207433124
Iterations : 100
Function evaluations : 101


In [26]:
xk = gradient_descent_method(x0, max_iter=10000)

No convergence
Current Gradient Value : 0.004036129276489186
Iterations : 10000
Function evaluations : 10001
