### Backtracking alogorithm 

An example of backtracking algorithm using a quadratic function

### Import library

In [1]:
import numpy as np
import pandas as pd

### Define SPD matrix and b of the quadratic fonction

q = $ \frac{1}{2} * x^T * A * x - b * x $

In [2]:
matrixSize = 7
A = np.random.rand(matrixSize, matrixSize)
A = np.dot(A, A.transpose())
b = np.random.rand(matrixSize, 1)

In [3]:
np.set_printoptions(precision=3)
print(A)

[[1.195 1.255 1.457 1.174 1.086 1.091 0.888]
 [1.255 2.257 2.012 1.889 0.975 1.575 1.574]
 [1.457 2.012 3.086 2.554 0.923 2.14  1.983]
 [1.174 1.889 2.554 2.25  0.803 1.894 1.941]
 [1.086 0.975 0.923 0.803 1.371 0.715 0.791]
 [1.091 1.575 2.14  1.894 0.715 2.264 1.803]
 [0.888 1.574 1.983 1.941 0.791 1.803 2.371]]


In [4]:
np.set_printoptions(precision=3)
print(b)

[[0.69 ]
 [0.364]
 [0.04 ]
 [0.505]
 [0.23 ]
 [0.945]
 [0.936]]


### Define quadratic function, gradient and hessian

In [5]:
def funct_quad(x):
    return 0.5 * np.dot(np.dot(x.T, A), x) - np.dot(b.T, x)

def gradient(x):
    return np.dot(A, x) - b

def hessian(x):
    return A

### Define rules function (A) and (W)

In [6]:
def check_armijo_rule(xk, ak, pk, c1=1e-4):
    return funct_quad(xk + ak * pk) <= funct_quad(xk) + ak * c1 * np.dot(gradient(xk).T, pk)

def check_wolfe_rule(xk, ak, pk, c2=0.9):
    return np.dot(gradient(xk + ak * pk).T, pk) >= c2 * np.dot(gradient(xk).T, pk)

### Define hessian inverse

In [7]:
def inverse_hessian():
    return np.linalg.inv(hessian(x=None))

A_inv = inverse_hessian()

In [8]:
np.set_printoptions(precision=3)
print(A_inv)

[[ 39.787 -15.215 -38.835  49.502 -19.421  -5.356  -2.284]
 [-15.215   8.676  18.894 -26.827   6.445   1.801   2.574]
 [-38.835  18.894  52.396 -71.373  17.225   4.286   7.593]
 [ 49.502 -26.827 -71.373 101.707 -21.192  -6.182 -12.513]
 [-19.421   6.445  17.225 -21.192  10.765   2.701   0.289]
 [ -5.356   1.801   4.286  -6.182   2.701   2.376  -0.422]
 [ -2.284   2.574   7.593 -12.513   0.289  -0.422   3.684]]


In [10]:
I = np.dot(A, A_inv)
np.set_printoptions(precision=3)
print(I)

[[ 1.000e+00  3.553e-15 -1.155e-14  2.309e-14  3.386e-15 -1.499e-15
  -3.553e-15]
 [-1.910e-14  1.000e+00  3.553e-15 -1.776e-14  6.106e-15  1.554e-15
   4.441e-15]
 [-6.217e-15  0.000e+00  1.000e+00  0.000e+00  5.551e-16  9.992e-16
   0.000e+00]
 [-1.599e-14  1.332e-14 -1.776e-15  1.000e+00  4.885e-15 -3.331e-16
   0.000e+00]
 [-3.064e-14  1.288e-14  1.155e-14 -4.086e-14  1.000e+00  1.277e-15
   1.332e-15]
 [ 5.329e-15  7.994e-15 -3.553e-15  1.776e-14 -7.772e-16  1.000e+00
  -1.776e-15]
 [ 2.043e-14  2.665e-15 -1.776e-14 -1.066e-14  7.883e-15 -1.332e-15
   1.000e+00]]


### Update p_k function and alpha_k

In [11]:
def get_inverse_hessian():
    return A_inv

def newtown_update_pk(xk):
    B_inv = get_inverse_hessian()
    return -np.dot(B_inv, gradient(xk))

In [13]:
def newtown_update_ak(xk, a0, pk, c1=1e-4, b_max=20, y=0.98):
    ak = a0
    for b in range(b_max):
        if check_armijo_rule(xk, ak, pk, c1=c1):
            IND = 1
            return ak, 1
        else:
            ak *= y
    IND = -1
    return ak, IND

In [14]:
def print_perf(f, g, h, flag):
    """
        Optimization terminated successfully.
         Current function value: 0.000000
         Iterations: 339
         Function evaluations: 571
    """
    if flag == 1:
        print("Optimization terminated successfully")
    elif flag == -1:
        print("Fail to compute alpha_k")
    else:
        print("No convergence")
    print(f"Function evaluations : {f}")
    print(f"Gradient evaluations : {g}")
    print(f"Hessian  evaluations : {h}")

def line_search_back_tracking(x0, a0, p0, c1=1e-4, b_max=20, y=0.98, c2=0.9, k_max=50, eps=1e-3):
    pk = p0
    ak = a0
    xk = x0
    count_grad = 0
    count_func = 0
    count_hessian = 0
    for i in range(k_max):
        count_grad += 3
        count_func += 2
        count_hessian += 1
        pk = newtown_update_pk(xk)
        ak, IND = newtown_update_ak(xk, ak, pk, c1=c1, b_max=b_max, y=y)
        if IND == 1:
            xk += ak * pk
        else:
            FLAG = -1 # Failure
            print_perf(count_func, count_grad-1, count_hessian, FLAG)
            return xk
        if np.linalg.norm(gradient(xk)) < eps:
            FLAG = 1 # Success
            print_perf(count_func, count_grad, count_hessian, FLAG)
            return xk
    FLAG = -2
    print_perf(count_func, count_grad, count_hessian, FLAG)
    return xk

### Initialise hyperparameters

In [15]:
p0 = np.random.rand(matrixSize, 1)
a0 = 0.9
x0 = np.random.rand(matrixSize, 1)

In [16]:
xk = line_search_back_tracking(x0, a0, p0)

Optimization terminated successfully
Function evaluations : 10
Gradient evaluations : 15
Hessian  evaluations : 5


Print Gradient Value Error

In [17]:
np.linalg.norm(np.dot(A, xk) - b)

0.0001325846413670121

Print function value

In [20]:
np.squeeze(funct_quad(xk))

array(-15.844)

In [21]:
print(xk)

[[ 33.725]
 [-14.552]
 [-38.784]
 [ 50.519]
 [-15.784]
 [ -3.523]
 [ -3.543]]
