In [2]:
import numpy as np
import numpy.linalg as la
import autograd.numpy as np
from autograd import grad, jacobian
from scipy.optimize import OptimizeResult, line_search

## Matrix definiteness

- positive (semi-positive) definite : all eigenvalue positive (non-negative)
- negative (semi-negative) definite : all eigenvalue negative (non-possible)
- indefinite


In [26]:
def positive_definite(A):
    """
    Check if the matrix A is positive definite.
    """
    return np.all(la.eigvals(A) > 0)


def negative_definite(A):
    """
    Check if the matrix A is positive definite.
    """
    return np.all(la.eigvals(A) < 0)


def semi_positive_definite(A):
    """
    Check if the matrix A is positive definite.
    """
    return np.all(la.eigvals(A) >= 0)


def semi_negative_definite(A):
    """
    Check if the matrix A is positive definite.
    """
    return np.all(la.eigvals(A) <= 0)


A = np.array([[3, 1, 5], [1, 4, 2], [5, 2, 1]])


positive_definite(A)  # Check if A is positive definite

np.False_

## Matrix symetry


In [27]:
def symetry(A: np.ndarray):
    """
    Check if the matrix A is positive definite.
    """
    if len(A.shape) != 2:
        raise False
    if A.shape[0] != A.shape[1]:
        raise False
    return np.allclose(A, A.T)


A = np.array([[3, 1, 5], [1, 4, 2], [5, 2, 1]])
symetry(A)  # Check if A is positive definite

True

# Matrix inverse

A square matrix is invertible if

- determinant is not zero
- SVD?
- Eigenvalue?


In [28]:
la.det(A)

np.float64(-80.99999999999996)

# Auto-differentiation


In [29]:
def f(x):
    return np.sum(x**2) + x[0] * x[1] - x[0] - 2 * x[1]


grad_f = grad(f)
hessen_f = jacobian(grad_f)
x = np.array([1.0, -1.0])
# print(
#     " gradient = ",
#     grad_f(x),
#     "\n Hessian = ",
#     hessen_f(x),
#     "\n determinant = ",
#     la.det(hessen_f(x)),
# )

 gradient =  [ 0. -3.] 
 Hessian =  [[2. 1.]
 [1. 2.]] 
 determinant =  2.9999999999999996


# Steepest Descent Method


In [30]:
def steepest_descent(
    fun, x0: np.ndarray, jac, ls=line_search, maxiter=100, amax=1000.0, tol=1.0e-8
):
    x_eps = tol  # tolerence for convergence on delta x
    f_eps = tol  # tolerence for convergence on delta f
    g_eps = tol  # tolerence for convergence on norm of gradient
    x_k = x0.copy()
    nit = 1
    f_k = fun(x_k)
    d_k = -jac(x_k)
    if la.norm(jac(x_k)) < g_eps:
        print("norm of gradient is within tolerence")
        return None
    while True:
        print("Interation: ", nit)
        print(f"x_{nit} = ", x_k)
        print(f"f_{nit} = ", f_k)
        print(f"g_{nit} = ", jac(x_k))
        print(f"d_{nit} = ", d_k)
        alpha_k, _, _, _, _, success = ls(fun, jac, x_k, d_k, amax=amax)
        if success is None:
            print("Line search fail")
            break

        print("alpha_k = ", alpha_k)
        if abs(alpha_k * la.norm(jac(x_k))) < x_eps:
            print("change of x is within tolerence")
            break

        x_k1 = x_k + alpha_k * d_k
        d_k1 = -jac(x_k1)

        if abs(f_k - fun(x_k1)) < f_eps:
            print("change of fun is within tolerence")
            break
        if la.norm(jac(x_k1)) < g_eps:
            print("norm of gradient is within tolerence")
            break

        if nit > maxiter:
            print("Max iter reached")
            break

        nit += 1
        x_k = x_k1
        f_k = fun(x_k1)
        d_k = d_k1

# Fletcher-Reeves Method

In [20]:
def fletcher_reeves(fun, x0, jac, ls=line_search, maxiter=100, amax=1000., tol=1.e-8):
    """
    Simple implementation of the Fletcher–Reeves nonlinear conjugate-gradient method
    for minimizing an unconstrained nonlinear function.

    Parameters
    ----------
    fun : callable
        The objective function to be minimized, fun(x) -> float.

    x0 : 1D array
        Initial guess.

    jac : callable
        The gradient of the objective, jac(x) -> array_like.

    ls : callable, optional
        A line-search routine (default: scipy.optimize.line_search).

    maxiter : int, optional
        Maximum number of iterations (default: 100).

    amax : float, optional
        Maximum step size in line search (default: 1000.).

    tol : float, optional
        Tolerance for stopping criteria (default: 1.e-8).

    Returns
    -------
    res : scipy.optimize.OptimizeResult
        The optimization result object with fields:
        - x:     final solution
        - fun:   function value at the solution
        - nit:   number of iterations
        - nfev:  number of function evaluations
        - njev:  number of gradient evaluations
        - success: True if a stopping criterion was met
        - message: termination reason
    """

    # Tolerances for various stopping criteria
    x_eps = tol  # Tolerance on the step size
    f_eps = tol  # Tolerance on the function change
    g_eps = tol  # Tolerance on the gradient norm

    # Initialize
    x_k = np.array(x0, dtype=float).copy()
    f_k = fun(x_k)
    nfev = 1
    g_k = jac(x_k)
    njev = 1
    nit = 0

    # Prepare result container
    res = OptimizeResult()

    # Check initial gradient norm
    if la.norm(g_k) < g_eps:
        res.x = x_k
        res.fun = f_k
        res.nit = nit
        res.nfev = nfev
        res.njev = njev
        res.success = True
        res.status = 0
        res.message = "Initial gradient norm below tolerance"
        return res

    # Initial direction: d0 = -g0
    d_k = -g_k

    while nit < maxiter:
        nit += 1

        # Line search along d_k
        alpha_k, fc_k, gc_k, f_k_new, old_fval, old_old_fval = ls(
            fun, jac, x_k, d_k, gfk=g_k, amax=amax
        )
        nfev += fc_k
        njev += gc_k

        # If line search fails
        if alpha_k is None or f_k_new is None:
            res.x = x_k
            res.fun = f_k
            res.nit = nit
            res.nfev = nfev
            res.njev = njev
            res.success = False
            res.status = 1
            res.message = "Line search failed"
            return res

        # Candidate new point
        x_k1 = x_k + alpha_k * d_k

        # Check step size
        step_norm = la.norm(alpha_k * d_k)
        if step_norm < x_eps:
            res.x = x_k1
            res.fun = f_k_new
            res.nit = nit
            res.nfev = nfev
            res.njev = njev
            res.success = True
            res.status = 0
            res.message = "Step size below tolerance"
            return res

        # Check function change
        if abs(f_k - f_k_new) < f_eps:
            res.x = x_k1
            res.fun = f_k_new
            res.nit = nit
            res.nfev = nfev
            res.njev = njev
            res.success = True
            res.status = 0
            res.message = "Function change below tolerance"
            return res

        # Compute new gradient
        g_k1 = jac(x_k1)
        njev += 1
        g_k1_norm = la.norm(g_k1)
        if g_k1_norm < g_eps:
            # Converged by gradient norm
            res.x = x_k1
            res.fun = f_k_new
            res.nit = nit
            res.nfev = nfev
            res.njev = njev
            res.success = True
            res.status = 0
            res.message = "Gradient norm below tolerance"
            return res

        # Fletcher-Reeves beta
        beta_k1 = np.dot(g_k1, g_k1) / np.dot(g_k, g_k)

        # Update direction
        d_k1 = -g_k1 + beta_k1 * d_k

        # Prepare for next iteration
        x_k = x_k1
        f_k = f_k_new
        g_k = g_k1
        d_k = d_k1

    # If we exit the loop, we hit max iterations
    res.x = x_k
    res.fun = f_k
    res.nit = nit
    res.nfev = nfev
    res.njev = njev
    res.success = False
    res.status = 1
    res.message = "Maximum number of iterations reached"
    return res


# TOFIX: Fletcher-Reeves Reset Method

In [32]:
def fletcher_reeves_reset(
    fun,
    x0: np.ndarray,
    jac,
    ls=line_search,
    maxiter=100,
    amax=1000.0,
    tol=1.0e-8,
    reset=100,
):
    x_eps = tol  # tolerence for convergence on delta x
    f_eps = tol  # tolerence for convergence on delta f
    g_eps = tol  # tolerence for convergence on norm of gradient
    x_k = x0.copy()
    nit = 1
    f_k = fun(x_k)
    d_k = -jac(x_k)
    if la.norm(jac(x_k)) < g_eps:
        print("norm of gradient is within tolerence")
        return None
    while True:
        print("Interation:", nit)
        print(f"x_{nit} = ", x_k)
        print(f"f_{nit} = ", f_k)
        print(f"g_{nit} = ", jac(x_k))
        print(f"d_{nit} = ", d_k)
        alpha_k, _, _, _, _, success = ls(fun, jac, x_k, d_k, amax=amax)
        if success is None:
            print("Line search fail")
            break

        print("alpha_k = ", alpha_k)
        if abs(alpha_k * la.norm(jac(x_k))) < x_eps:
            print("change of x is within tolerence")
            break
        x_k1 = x_k + alpha_k * d_k
        d_k1 = None
        if nit % reset == 0:
            d_k1 = -jac(x_k1)
        else:
            d_k1 = -jac(x_k1) + (la.norm(jac(x_k1)) ** 2 / la.norm(jac(x_k)) ** 2) * d_k

        if abs(f_k - fun(x_k1)) < f_eps:
            print("change of fun is within tolerence")
            break
        if la.norm(jac(x_k1)) < g_eps:
            print("norm of gradient is within tolerence")
            break

        if nit > maxiter:
            print("Max iter reached")
            break
        nit += 1
        x_k = x_k1
        f_k = fun(x_k1)
        d_k = d_k1

# Newton Method

In [33]:
def newton(
    fun,
    x0: np.ndarray,
    jac,
    ls=line_search,
    maxiter=100,
    amax=1000.0,
    tol=1.0e-8,
):
    x_eps = tol  # tolerence for convergence on delta x
    f_eps = tol  # tolerence for convergence on delta f
    g_eps = tol  # tolerence for convergence on norm of gradient
    hessian = jacobian(jac)
    x_k = x0.copy()
    nit = 1
    f_k = fun(x_k)
    d_k = -la.inv(hessian(x_k)) @ jac(x_k)
    if la.norm(jac(x_k)) < g_eps:
        print("norm of gradient is within tolerence")
        return None
    while True:
        print("Interation:", nit)
        print(f"x_{nit} = ", x_k)
        print(f"f_{nit} = ", f_k)
        print(f"g_{nit} = ", jac(x_k))
        print(f"d_{nit} = ", d_k)
        x_k1 = x_k + d_k
        d_k1 = -la.inv(hessian(x_k)) @ jac(x_k1)
        if abs(f_k - fun(x_k1)) < f_eps:
            print("change of fun is within tolerence")
            break
        if la.norm(jac(x_k1)) < g_eps:
            print("norm of gradient is within tolerence")
            break

        if nit > maxiter:
            print("Max iter reached")
            break
        nit += 1
        x_k = x_k1
        f_k = fun(x_k1)
        d_k = d_k1

# Modified Newton Method

In [34]:
def newton_modified(
    fun,
    x0: np.ndarray,
    jac,
    ls=line_search,
    maxiter=100,
    amax=1000.0,
    tol=1.0e-8,
):
    x_eps = tol  # tolerence for convergence on delta x
    f_eps = tol  # tolerence for convergence on delta f
    g_eps = tol  # tolerence for convergence on norm of gradient
    hessian = jacobian(jac)
    x_k = x0.copy()
    nit = 1
    f_k = fun(x_k)
    d_k = -la.inv(hessian(x_k)) @ jac(x_k)
    if la.norm(jac(x_k)) < g_eps:
        print("norm of gradient is within tolerence")
        return None
    while True:
        print("Interation:", nit)
        print(f"x_{nit} = ", x_k)
        print(f"f_{nit} = ", f_k)
        print(f"g_{nit} = ", jac(x_k))
        print(f"d_{nit} = ", d_k)
        alpha_k, _, _, _, _, success = ls(fun, jac, x_k, d_k, amax=amax)
        if success is None:
            print("Line search fail")
            break

        print("alpha_k = ", alpha_k)
        if abs(alpha_k * la.norm(d_k)) < x_eps:
            print("change of x is within tolerence")
            break
        x_k1 = x_k + alpha_k * d_k
        d_k1 = -la.inv(hessian(x_k)) @ jac(x_k1)
        if abs(f_k - fun(x_k1)) < f_eps:
            print("change of fun is within tolerence")
            break
        if la.norm(jac(x_k1)) < g_eps:
            print("norm of gradient is within tolerence")
            break

        if nit > maxiter:
            print("Max iter reached")
            break
        nit += 1
        x_k = x_k1
        f_k = fun(x_k1)
        d_k = d_k1

# Davidon-Fletcher-Powell Method

In [19]:
def dfp(fun, x0, jac, H_k=None, ls=line_search, maxiter=100, amax=1000., tol=1.e-8, logs=True):
    """
    Simple implementation of the DFP method for minimising an unconstrained 
    nonlinear function.

    Parameters:
    -----------
    fun : callable
        The scalar function to be minimized: fun(x) -> float

    x0 : 1D array
        Initial guess

    jac : callable
        The gradient function: jac(x) -> array_like

    ls : callable, optional
        A line-search routine, default is scipy.optimize.line_search

    maxiter : int, optional
        Maximum number of iterations (default: 100)

    amax : float, optional
        Maximum step size in the line search (default: 1000.)

    tol : float, optional
        Tolerance for stopping criteria (default: 1.e-8)

    logs : boolean, optional
        Option to print the parameters calculated for each step

    Returns:
    --------
    res : scipy.optimize.OptimizeResult
        The result of the optimization, with fields:
        - x: the final point
        - fun: the final function value
        - nfev: number of function evaluations
        - njev: number of gradient evaluations
        - nit: number of iterations
        - success: boolean indicating if convergence criteria met
        - message: termination description
    """
    # Tolerances for convergence
    x_eps = tol  # Tolerance on step size
    f_eps = tol  # Tolerance on function change
    g_eps = tol  # Tolerance on gradient norm

    x_k = x0.astype(float).copy()
    f_k = fun(x_k)
    nfev = 1
    g_k = jac(x_k)
    njev = 1
    nit = 0

    # Initialize H as the identity matrix
    n = x_k.size
    H_k = H_k if H_k is not None else np.eye(n)

    # Prepare an OptimizeResult to store info
    res = OptimizeResult()

    # Check initial gradient norm
    n_g_k = la.norm(g_k)
    if n_g_k < g_eps:
        res.x = x_k
        res.fun = f_k
        res.success = True
        res.status = 0
        res.message = "Initial gradient norm is within tolerance"
        res.nfev = nfev
        res.njev = njev
        res.nit = nit
        return res

    while True:
        nit += 1

        # Compute search direction d_k = -H_k * g_k
        d_k = -H_k.dot(g_k)
        if logs:
            print(f"Direction ({nit}): {d_k}")
            print(f"Grad ({nit}): {g_k}")

        # Line search to find alpha
        alpha_k, fc_k, gc_k, f_k_new, old_fval, old_old_fval = ls(
            fun, jac, x_k, d_k, gfk=g_k, amax=amax
        )
        nfev += fc_k
        njev += gc_k
        if logs:
            print(f"Alpha ({nit}): {alpha_k}")

        # If line search fails
        if alpha_k is None or f_k_new is None:
            res.x = x_k
            res.fun = f_k
            res.success = False
            res.status = 1
            res.message = "Line search failed"
            res.nfev = nfev
            res.njev = njev
            res.nit = nit
            return res

        # Candidate new point
        x_k1 = x_k + alpha_k * d_k
        if logs:
            print(f"x_({nit}+1): {x_k1}")

        # Check step size
        step_norm = la.norm(alpha_k * d_k)
        if step_norm < x_eps:
            res.x = x_k1
            res.fun = f_k_new
            res.success = True
            res.status = 0
            res.message = "Step size below tolerance"
            res.nfev = nfev
            res.njev = njev
            res.nit = nit
            return res

        # Check function change
        if abs(f_k - f_k_new) < f_eps:
            res.x = x_k1
            res.fun = f_k_new
            res.success = True
            res.status = 0
            res.message = "Function change below tolerance"
            res.nfev = nfev
            res.njev = njev
            res.nit = nit
            return res

        # Compute new gradient
        g_k1 = jac(x_k1)
        njev += 1
        n_g_k1 = la.norm(g_k1)
        if n_g_k1 < g_eps:
            res.x = x_k1
            res.fun = f_k_new
            res.success = True
            res.status = 0
            res.message = "Gradient norm below tolerance"
            res.nfev = nfev
            res.njev = njev
            res.nit = nit
            return res

        # Update H_k using the DFP formula
        s_k = x_k1 - x_k       # = alpha_k * d_k
        y_k = g_k1 - g_k
        if logs:
            print(f"s_{nit} = {s_k}, y_{nit} = {y_k}")

        # Avoid division by zero in DFP updates
        sTy = np.dot(s_k, y_k)
        if abs(sTy) > 1e-14:
            # First term: s_k s_k^T / (s_k^T y_k)
            term1 = np.outer(s_k, s_k) / sTy
        else:
            term1 = np.zeros((n, n))

        # For the second term: H_k y_k y_k^T H_k / (y_k^T H_k y_k)
        Hy_k = H_k.dot(y_k)
        yH_y = y_k.dot(Hy_k)
        if abs(yH_y) > 1e-14:
            term2 = np.outer(Hy_k, Hy_k) / yH_y
        else:
            term2 = np.zeros((n, n))

        H_k = H_k + term1 - term2
        if logs:
            print(f"H_{nit} = {H_k}")

        # Prepare for next iteration
        x_k = x_k1
        f_k = f_k_new
        g_k = g_k1

        # Check iteration limit
        if nit >= maxiter:
            res.x = x_k
            res.fun = f_k
            res.success = False
            res.status = 1
            res.message = "Max iterations reached"
            res.nfev = nfev
            res.njev = njev
            res.nit = nit
            return res

    # Should not reach here, but just in case:
    res.x = x_k
    res.fun = f_k
    res.success = False
    res.status = 2
    res.message = "Exited loop unexpectedly"
    res.nfev = nfev
    res.njev = njev
    res.nit = nit
    return res