In [1]:
import numpy as np
from numpy.linalg import inv, norm

# 12.30

In [17]:
def BFGS(f, df, x0, A0, max_iter=40, tol=1e-8):
    '''Minimize f using BFGS, given the derivative df, an
    initial guess x0, and an initial approx A0 of D^2f(x0).
    '''

    done = False
    iters = 0
    A_inv = inv(A0)
    x = x0 - A_inv @ df(x0)
    s = x - x0
    
    while not done:
        y = df(x) - df(x0)
        sy = s@y
        Ay = A_inv @ y

        A_inv = (A_inv + ((sy + y @ Ay)/sy**2) * np.outer(s,s)
                 - (np.outer(Ay, s) + np.outer(s,Ay))/sy)

        x0 = x
        x = x0 - A_inv @ df(x0)
        s = x - x0
        iters += 1

        done = ((norm(s) < tol) or
                (norm(df(x)) < tol) or
                (np.abs(f(x) - f(x0)) < tol) or
                (iters >= max_iter))
    
    return x, iters

def f(xy):
    '''xy: ndarray (2,)'''
    x, y = xy
    return x**3 - 3*x**2 + y**2

def df(xy):
    x, y = xy
    return np.array([3*x**2 - 6*x, 2*y])

# d2f = [[6x - 6, 0],
#        [0,      2]]

## 12.30(a)

In [19]:
x0 = np.array([4, 4])
A0 = np.array([[18, 0], [0, 2]])

result = BFGS(f, df, x0, A0)
print('minimizer:', tuple(result[0]), '\nnum iters:', result[1])

minimizer: (1.9999999666942383, 3.3275430212687627e-07) 
num iters: 6


## 12.30(b)

In [22]:
x0 = np.array([4, 4])
A0 = np.array([[1, 0], [0, 1]])

result = BFGS(f, df, x0, A0)
print('minimizer:', tuple(result[0]), '\nnum iters:', result[1])

minimizer: (1.9999997063849033, -9.366073702609691e-07) 
num iters: 14


## 12.30(c)

In [24]:
x0 = np.array([10, 10])
A0 = np.array([[54, 0], [0, 2]])

result = BFGS(f, df, x0, A0)
print('minimizer:', tuple(result[0]), '\nnum iters:', result[1])

minimizer: (2.000000005087349, -2.945072736340697e-09) 
num iters: 8


## 12.30(d)

In [26]:
x0 = np.array([10, 10])
A0 = np.array([[1, 0], [0, 1]])

result = BFGS(f, df, x0, A0)
print('minimizer:', tuple(result[0]), '\nnum iters:', result[1])

minimizer: (2.000000032260174, -7.826883487596503e-08) 
num iters: 18


## 12.30(e)

In [29]:
x0 = np.array([0, 0])
A0 = np.array([[1, 0], [0, 1]])

result = BFGS(f, df, x0, A0)
print('minimizer:', tuple(result[0]), '\nnum iters:', result[1])

minimizer: (nan, nan) 
num iters: 40


  A_inv = (A_inv + ((sy + y @ Ay)/sy**2) * np.outer(s,s)
  - (np.outer(Ay, s) + np.outer(s,Ay))/sy)


When $x_0 = (0,0)$, $Df(x_0) = [0\quad0]$, so whatever $A_0$ is, $x_{k+1} = x_k - A_k^{-1} [0\quad0]^\intercal = x_k = \cdots = x_0 \quad \forall k$