In [1]:
import numpy as np
from scipy.optimize import minimize_scalar

In [2]:
def steepest_descent(f, grad_f, x0, tol=1e-6, max_iter=100):
    """
    Steepest Descent Method with Exact Line Search

    Parameters:
    - f: function f(x), where x is a numpy array
    - grad_f: gradient function grad_f(x) returning numpy array
    - x0: initial point (numpy array)
    - tol: tolerance for stopping criterion (norm of gradient)
    - max_iter: maximum number of iterations

    Returns:
    - x: final point (minimum estimate)
    - history: list of iterates
    """
    x = x0.copy()
    history = [x.copy()]

    for i in range(max_iter):
        grad = grad_f(x)
        grad_norm = np.linalg.norm(grad)

        if grad_norm < tol:
            print(f"Converged in {i} iterations.")
            break

        d = -grad  # Steepest descent direction

        # Define function for line search: f_alpha(α) = f(x + αd)
        f_alpha = lambda alpha: f(x + alpha * d)
        res = minimize_scalar(f_alpha)

        alpha = res.x  # Best step size
        x = x + alpha * d
        history.append(x.copy())

        print(f"Iter {i+1}: alpha = {alpha:.6f}, x = {x}, f(x) = {f(x):.6f}, ||grad|| = {grad_norm:.6f}")

    return x, history


In [3]:

def f(x):
    x1, x2 = x
    return (x1 - 2)**2 + (x2 + 3)**2


def grad_f(x):
    x1, x2 = x
    return np.array([2*(x1 - 2), 2*(x2 + 3)])


x0 = np.array([0.0, 0.0])


x_min, history = steepest_descent(f, grad_f, x0)

print("\nMinimum found at:", x_min)
print("f(min) =", f(x_min))


Iter 1: alpha = 0.500000, x = [ 2. -3.], f(x) = 0.000000, ||grad|| = 7.211103
Converged in 1 iterations.

Minimum found at: [ 2. -3.]
f(min) = 0.0


In [5]:
def conjugate_gradient(A, b, x0, tol=1e-6, max_iter=10):
    x = x0
    r = b - A @ x
    p = r
    while True:#for i in range(max_iter):
        Ap = A @ p
        alpha = r.T @ r / (p.T @ Ap)
        x = x + alpha * p
        r_new = r - alpha * Ap
        if np.linalg.norm(r_new) < tol:
            break
        beta = (r_new.T @ r_new) / (r.T @ r)
        p = r_new + beta * p
        r = r_new
    return x


A = np.array([[2, 0], [0, 4]])
b = np.array([-1, 1])
x0 = np.array([0, 0])
x_min = conjugate_gradient(A, b, x0)
print("Minimum at:", x_min)


Minimum at: [-0.5   0.25]


In [6]:
history

[array([0., 0.]), array([ 2., -3.])]

In [7]:
from scipy.optimize import minimize

In [10]:
def quadratic_function(x):
    Q = np.array([[2, -3], [-3, 5]])  
    b = np.array([0, -1])                              
    return 0.5 * np.dot(x, np.dot(Q, x)) + np.dot(b, x)

# Gradient of quadratic function
def gradient(x):
    Q = np.array([[2, -3], [-3, 5]])  
    b = np.array([0, -1])
    return np.dot(Q, x) + b

x0 = np.array([0.0, 0.0])

# Minimize 
result = minimize(quadratic_function, x0, method='BFGS', jac=gradient)

In [11]:
print("Optimal solution:", result.x)
print("Function value at optimum:", result.fun)
print("Number of iterations:", result.nit)

Optimal solution: [3. 2.]
Function value at optimum: -0.9999999999999989
Number of iterations: 5


In [18]:
def BFGS(f, grad_f, Q, x0, tol=1e-6, max_iter=100):
    x = x0.copy()
    history = [x.copy()]
    H = -np.linalg.inv(Q)
    B = np.array([[1, 0], [0, 1]])
    while True:
        grad = grad_f(x)
        grad_norm = np.linalg.norm(grad)
        
        if grad_norm < tol:
            print(f"Converged in {i} iterations.")
            break
        d = -(H @ grad)
        f_alpha = lambda alpha: f(x + alpha * d)
        res = minimize_scalar(f_alpha)
        alpha = res.x
        
        del_x = alpha * d
        B = B + (grad_f(x) @ grad_f(x).T)/(grad_f(x).T @ grad_f(x)) - (B @ del_x @ del_x.T @ B)/(del_x.T @ B @ del_x)
        H = np.linalg.inv(B)
        
        x = x - alpha * (H @ grad)
        print(f"Iter {i+1}: alpha = {alpha:.6f}, x = {x}, f(x) = {f(x):.6f}, ||grad|| = {grad_norm:.6f}")
    return x, history

In [19]:

x0 = np.array([0.0, 0.0])
Q = np.array([[2, -3], [-3, 5]])
x_min, history = BFGS(quadratic_function, gradient, Q, x0)

print("\nMinimum found at:", x_min)
print("f(min) =", f(x_min))

ValueError: matmul: Input operand 0 does not have enough dimensions (has 0, gufunc core with signature (n?,k),(k,m?)->(n?,m?) requires 1)