In [25]:
import numpy as np
import numpy.linalg as la
import autograd.numpy as np
from autograd import grad, jacobian
from scipy.optimize import line_search

## Matrix definiteness

- positive (semi-positive) definite : all eigenvalue positive (non-negative)
- negative (semi-negative) definite : all eigenvalue negative (non-possible)
- indefinite


In [26]:
def positive_definite(A):
    """
    Check if the matrix A is positive definite.
    """
    return np.all(la.eigvals(A) > 0)


def negative_definite(A):
    """
    Check if the matrix A is positive definite.
    """
    return np.all(la.eigvals(A) < 0)


def semi_positive_definite(A):
    """
    Check if the matrix A is positive definite.
    """
    return np.all(la.eigvals(A) >= 0)


def semi_negative_definite(A):
    """
    Check if the matrix A is positive definite.
    """
    return np.all(la.eigvals(A) <= 0)


A = np.array([[3, 1, 5], [1, 4, 2], [5, 2, 1]])


positive_definite(A)  # Check if A is positive definite

np.False_

## Matrix symetry


In [27]:
def symetry(A: np.ndarray):
    """
    Check if the matrix A is positive definite.
    """
    if len(A.shape) != 2:
        raise False
    if A.shape[0] != A.shape[1]:
        raise False
    return np.allclose(A, A.T)


A = np.array([[3, 1, 5], [1, 4, 2], [5, 2, 1]])
symetry(A)  # Check if A is positive definite

True

# Matrix inverse

A square matrix is invertible if

- determinant is not zero
- SVD?
- Eigenvalue?


In [28]:
la.det(A)

np.float64(-80.99999999999996)

# Auto-differentiation


In [29]:
def f(x):
    return np.sum(x**2) + x[0] * x[1] - x[0] - 2 * x[1]


grad_f = grad(f)
hessen_f = jacobian(grad_f)
x = np.array([1.0, -1.0])
print(
    " gradient = ",
    grad_f(x),
    "\n Hessian = ",
    hessen_f(x),
    "\n determinant = ",
    la.det(hessen_f(x)),
)

 gradient =  [ 0. -3.] 
 Hessian =  [[2. 1.]
 [1. 2.]] 
 determinant =  2.9999999999999996


# Steepest Descent Method


In [30]:
def steepest_descent(
    fun, x0: np.ndarray, jac, ls=line_search, maxiter=100, amax=1000.0, tol=1.0e-8
):
    x_eps = tol  # tolerence for convergence on delta x
    f_eps = tol  # tolerence for convergence on delta f
    g_eps = tol  # tolerence for convergence on norm of gradient
    x_k = x0.copy()
    nit = 1
    f_k = fun(x_k)
    d_k = -jac(x_k)
    if la.norm(jac(x_k)) < g_eps:
        print("norm of gradient is within tolerence")
        return None
    while True:
        print("Interation: ", nit)
        print(f"x_{nit} = ", x_k)
        print(f"f_{nit} = ", f_k)
        print(f"g_{nit} = ", jac(x_k))
        print(f"d_{nit} = ", d_k)
        alpha_k, _, _, _, _, success = ls(fun, jac, x_k, d_k, amax=amax)
        if success is None:
            print("Line search fail")
            break

        print("alpha_k = ", alpha_k)
        if abs(alpha_k * la.norm(jac(x_k))) < x_eps:
            print("change of x is within tolerence")
            break

        x_k1 = x_k + alpha_k * d_k
        d_k1 = -jac(x_k1)

        if abs(f_k - fun(x_k1)) < f_eps:
            print("change of fun is within tolerence")
            break
        if la.norm(jac(x_k1)) < g_eps:
            print("norm of gradient is within tolerence")
            break

        if nit > maxiter:
            print("Max iter reached")
            break

        nit += 1
        x_k = x_k1
        f_k = fun(x_k1)
        d_k = d_k1

# Fletcher-Reeves Method

In [31]:
def fletcher_reeves(
    fun, x0: np.ndarray, jac, ls=line_search, maxiter=100, amax=1000.0, tol=1.0e-8
):
    x_eps = tol  # tolerence for convergence on delta x
    f_eps = tol  # tolerence for convergence on delta f
    g_eps = tol  # tolerence for convergence on norm of gradient
    x_k = x0.copy()
    nit = 1
    f_k = fun(x_k)
    d_k = -jac(x_k)
    if la.norm(jac(x_k)) < g_eps:
        print("norm of gradient is within tolerence")
        return None
    while True:
        print("Interation: ", nit)
        print(f"x_{nit} = ", x_k)
        print(f"f_{nit} = ", f_k)
        print(f"g_{nit} = ", jac(x_k))
        print(f"d_{nit} = ", d_k)
        alpha_k, _, _, _, _, success = ls(fun, jac, x_k, d_k, amax=amax)
        if success is None:
            print("Line search fail")
            break

        print("alpha_k = ", alpha_k)
        if abs(alpha_k * la.norm(jac(x_k))) < x_eps:
            print("change of x is within tolerence")
            break

        x_k1 = x_k + alpha_k * d_k
        d_k1 = -jac(x_k1) + (la.norm(jac(x_k1)) ** 2 / la.norm(jac(x_k)) ** 2) * d_k

        if abs(f_k - fun(x_k1)) < f_eps:
            print("change of fun is within tolerence")
            break
        if la.norm(jac(x_k1)) < g_eps:
            print("norm of gradient is within tolerence")
            break

        if nit > maxiter:
            print("Max iter reached")
            break

        nit += 1
        x_k = x_k1
        f_k = fun(x_k1)
        d_k = d_k1

# Fletcher-Reeves Reset Method

In [32]:
def fletcher_reeves_reset(
    fun,
    x0: np.ndarray,
    jac,
    ls=line_search,
    maxiter=100,
    amax=1000.0,
    tol=1.0e-8,
    reset=100,
):
    x_eps = tol  # tolerence for convergence on delta x
    f_eps = tol  # tolerence for convergence on delta f
    g_eps = tol  # tolerence for convergence on norm of gradient
    x_k = x0.copy()
    nit = 1
    f_k = fun(x_k)
    d_k = -jac(x_k)
    if la.norm(jac(x_k)) < g_eps:
        print("norm of gradient is within tolerence")
        return None
    while True:
        print("Interation:", nit)
        print(f"x_{nit} = ", x_k)
        print(f"f_{nit} = ", f_k)
        print(f"g_{nit} = ", jac(x_k))
        print(f"d_{nit} = ", d_k)
        alpha_k, _, _, _, _, success = ls(fun, jac, x_k, d_k, amax=amax)
        if success is None:
            print("Line search fail")
            break

        print("alpha_k = ", alpha_k)
        if abs(alpha_k * la.norm(jac(x_k))) < x_eps:
            print("change of x is within tolerence")
            break
        x_k1 = x_k + alpha_k * d_k
        d_k1 = None
        if nit % reset == 0:
            d_k1 = -jac(x_k1)
        else:
            d_k1 = -jac(x_k1) + (la.norm(jac(x_k1)) ** 2 / la.norm(jac(x_k)) ** 2) * d_k

        if abs(f_k - fun(x_k1)) < f_eps:
            print("change of fun is within tolerence")
            break
        if la.norm(jac(x_k1)) < g_eps:
            print("norm of gradient is within tolerence")
            break

        if nit > maxiter:
            print("Max iter reached")
            break
        nit += 1
        x_k = x_k1
        f_k = fun(x_k1)
        d_k = d_k1

# Newton Method

In [33]:
def newton(
    fun,
    x0: np.ndarray,
    jac,
    ls=line_search,
    maxiter=100,
    amax=1000.0,
    tol=1.0e-8,
):
    x_eps = tol  # tolerence for convergence on delta x
    f_eps = tol  # tolerence for convergence on delta f
    g_eps = tol  # tolerence for convergence on norm of gradient
    hessian = jacobian(jac)
    x_k = x0.copy()
    nit = 1
    f_k = fun(x_k)
    d_k = -la.inv(hessian(x_k)) @ jac(x_k)
    if la.norm(jac(x_k)) < g_eps:
        print("norm of gradient is within tolerence")
        return None
    while True:
        print("Interation:", nit)
        print(f"x_{nit} = ", x_k)
        print(f"f_{nit} = ", f_k)
        print(f"g_{nit} = ", jac(x_k))
        print(f"d_{nit} = ", d_k)
        x_k1 = x_k + d_k
        d_k1 = -la.inv(hessian(x_k)) @ jac(x_k1)
        if abs(f_k - fun(x_k1)) < f_eps:
            print("change of fun is within tolerence")
            break
        if la.norm(jac(x_k1)) < g_eps:
            print("norm of gradient is within tolerence")
            break

        if nit > maxiter:
            print("Max iter reached")
            break
        nit += 1
        x_k = x_k1
        f_k = fun(x_k1)
        d_k = d_k1

# Modified Newton Method

In [34]:
def newton_modified(
    fun,
    x0: np.ndarray,
    jac,
    ls=line_search,
    maxiter=100,
    amax=1000.0,
    tol=1.0e-8,
):
    x_eps = tol  # tolerence for convergence on delta x
    f_eps = tol  # tolerence for convergence on delta f
    g_eps = tol  # tolerence for convergence on norm of gradient
    hessian = jacobian(jac)
    x_k = x0.copy()
    nit = 1
    f_k = fun(x_k)
    d_k = -la.inv(hessian(x_k)) @ jac(x_k)
    if la.norm(jac(x_k)) < g_eps:
        print("norm of gradient is within tolerence")
        return None
    while True:
        print("Interation:", nit)
        print(f"x_{nit} = ", x_k)
        print(f"f_{nit} = ", f_k)
        print(f"g_{nit} = ", jac(x_k))
        print(f"d_{nit} = ", d_k)
        alpha_k, _, _, _, _, success = ls(fun, jac, x_k, d_k, amax=amax)
        if success is None:
            print("Line search fail")
            break

        print("alpha_k = ", alpha_k)
        if abs(alpha_k * la.norm(d_k)) < x_eps:
            print("change of x is within tolerence")
            break
        x_k1 = x_k + alpha_k * d_k
        d_k1 = -la.inv(hessian(x_k)) @ jac(x_k1)
        if abs(f_k - fun(x_k1)) < f_eps:
            print("change of fun is within tolerence")
            break
        if la.norm(jac(x_k1)) < g_eps:
            print("norm of gradient is within tolerence")
            break

        if nit > maxiter:
            print("Max iter reached")
            break
        nit += 1
        x_k = x_k1
        f_k = fun(x_k1)
        d_k = d_k1

# Davidon-Fletcher-Powell Method

In [50]:
def davidon_fletcher_powell(
    fun,
    x0: np.ndarray,
    jac,
    H1: np.ndarray,
    ls=line_search,
    maxiter=100,
    amax=1000.0,
    tol=1.0e-8,
):
    if not symetry(H1) or not positive_definite(H1):
        raise ValueError("H1 must be a symetric positive definite matrix")
    x_eps = tol  # tolerence for convergence on delta x
    f_eps = tol  # tolerence for convergence on delta f
    g_eps = tol  # tolerence for convergence on norm of gradient
    hessian = jacobian(jac)
    x_k = x0.copy()
    nit = 1
    f_k = fun(x_k)
    H_k = H1.copy()
    d_k = -H_k @ jac(x_k)
    if la.norm(jac(x_k)) < g_eps:
        print("norm of gradient is within tolerence")
        return None
    while True:
        print("Interation:", nit)
        print(f"x_{nit} = ", x_k)
        print(f"f_{nit} = ", f_k)
        print(f"g_{nit} = ", jac(x_k))
        print(f"d_{nit} = ", d_k)
        print(f"H_{nit} = ", H_k)
        alpha_k, _, _, _, _, success = ls(fun, jac, x_k, d_k, amax=amax)
        if nit == 2:
            alpha_k = 5 / 6
        if success is None:
            print("Line search fail")
            break

        print(f"alpha_{nit} = ", alpha_k)
        if abs(alpha_k * la.norm(d_k)) < x_eps:
            print("change of x is within tolerence")
            break
        x_k1 = x_k + alpha_k * d_k
        p_k = alpha_k * d_k
        q_k = jac(x_k1) - jac(x_k)
        H_k1 = (
            H_k
            + np.outer(p_k, p_k) / (p_k.T @ q_k)
            - np.outer(H_k @ q_k, H_k @ q_k) / (q_k.T @ H_k @ q_k)
        )
        d_k1 = -H_k1 @ jac(x_k1)
        if abs(f_k - fun(x_k1)) < f_eps:
            print("change of fun is within tolerence")
            break
        if la.norm(jac(x_k1)) < g_eps:
            print("norm of gradient is within tolerence")
            break

        if nit > maxiter:
            print("Max iter reached")
            break
        print(f"p_{nit}", p_k)
        print(f"q_{nit}", q_k)
        nit += 1
        x_k = x_k1
        f_k = fun(x_k1)
        d_k = d_k1
        H_k = H_k1