In [6]:
f = lambda x,y:x**2+y**2
result = minimize(f, [3,4], method = 'BFGS', precision = 1e-5, max_iter = 1000)
result.val_rec

array([[3.00000000e+00, 4.00000000e+00],
       [2.40913746e+00, 3.21218328e+00],
       [1.96271942e+00, 2.61695923e+00],
       [1.59902355e+00, 2.13203140e+00],
       [1.30272126e+00, 1.73696168e+00],
       [1.06132438e+00, 1.41509917e+00],
       [8.64658830e-01, 1.15287844e+00],
       [7.04435803e-01, 9.39247737e-01],
       [5.73902427e-01, 7.65203235e-01],
       [4.67557148e-01, 6.23409531e-01],
       [3.80917865e-01, 5.07890486e-01],
       [3.10333015e-01, 4.13777354e-01],
       [2.52827681e-01, 3.37103575e-01],
       [2.05978201e-01, 2.74637602e-01],
       [1.67810025e-01, 2.23746700e-01],
       [1.36714488e-01, 1.82285985e-01],
       [1.11381018e-01, 1.48508024e-01],
       [9.07418903e-02, 1.20989187e-01],
       [7.39272348e-02, 9.85696464e-02],
       [6.02283689e-02, 8.03044919e-02],
       [4.90679306e-02, 6.54239074e-02],
       [3.99755440e-02, 5.33007253e-02],
       [3.25679950e-02, 4.34239933e-02],
       [2.65330798e-02, 3.53774397e-02],
       [2.161644

In [2]:
def get_grad(fn, x, var_names):
    variables = [Variable(var_names[idx], x_n) for idx, x_n in enumerate(x)]
    out = fn(*variables)
    jacobian = out.jacobian()
    grad = np.array([jacobian[name] for name in var_names])
    return grad

def line_search(fn, x, search_direction, grad, beta = 0.9, c = 0.9, alpha_init = 1):
    """approximately minimizes f along search_direction
    https://en.wikipedia.org/wiki/Backtracking_line_search
    """
    m = search_direction.T.dot(grad)
    alpha = alpha_init
    print(-c*alpha*m)
    while (fn(*(x)) - fn(*(x+alpha*search_direction))) < -c*alpha*m:
        alpha = alpha * beta
    return alpha

def update_hessian(approx_hessian, d_grad, step):
    return (approx_hessian 
            + 1/(d_grad.T.dot(step))*d_grad.dot(d_grad.T) 
            - 1/(step.T.dot(approx_hessian).dot(step))*(approx_hessian.dot(step).dot(step.T).dot(approx_hessian.T))
           )

def min_BFGS(fn, x0, precision, max_iter, lr=0.01):
    approx_hessian = np.identity(len(x0))
    x = np.array(x0).reshape(-1,1)
    var_names = ['x'+str(idx) for idx in range(len(x))]
    new_grad = get_grad(fn, x, var_names)
    for _ in range(max_iter):
        # get new x values
        grad = new_grad
        search_direction = -np.linalg.pinv(approx_hessian).dot(grad)
        stepsize = line_search(fn, x, search_direction, grad)
        print('stepsize:',stepsize)
        print('search_direction:',search_direction)
        step = stepsize * search_direction
        x = x + step
        print(x)
        # update hessian approximation
        new_grad = get_grad(fn, x, var_names)
        d_grad = new_grad - grad
        approx_hessian = update_hessian(approx_hessian, d_grad, step)

In [72]:
fn = lambda x,y: x**2+y**2
x0 = [100,1]
precision = 0.1
max_iter = 100
min_BFGS(fn, x0, precision, max_iter, lr=0.01)

[[36003.6]]
stepsize: 0.0984770902183612
search_direction: [[-200.]
 [  -2.]]
[[80.30458196]
 [ 0.80304582]]
[[11609.04737838]]
stepsize: 0.1853020188851842
search_direction: [[-80.30458196]
 [ -0.80304582]]
[[65.42398079]
 [ 0.65423981]]
[[7705.30552681]]
stepsize: 0.1853020188851842
search_direction: [[-65.42398079]
 [ -0.65423981]]
[[53.30078507]
 [ 0.53300785]]
[[5114.2640155]]
stepsize: 0.1853020188851842
search_direction: [[-53.30078507]
 [ -0.53300785]]
[[43.42404199]
 [ 0.43424042]]
[[3394.50477716]]
stepsize: 0.1853020188851842
search_direction: [[-43.42404199]
 [ -0.43424042]]
[[35.37747934]
 [ 0.35377479]]
[[2253.04416182]]
stepsize: 0.1853020188851842
search_direction: [[-35.37747934]
 [ -0.35377479]]
[[28.82196099]
 [ 0.28821961]]
[[1495.41931103]]
stepsize: 0.1853020188851842
search_direction: [[-28.82196099]
 [ -0.28821961]]
[[23.48119343]
 [ 0.23481193]]
[[992.55884713]]
stepsize: 0.1853020188851842
search_direction: [[-23.48119343]
 [ -0.23481193]]
[[19.13008088]
 [ 0.

In [12]:
# get new x values
grad = get_grad(fn, x, var_names) ## replace this
search_direction = -np.linalg.pinv(approx_hessian).dot(grad)

In [14]:
f = lambda x: fn(*x)
myfprime = lambda x: get_grad(fn, x, var_names).T
step_distance = line_search(f, myfprime, x, search_direction)

In [15]:
step_distance

(array([[0.5]]), 3, 1, array([0.]), array([325.]), array([[0., 0., 0., 0.]]))

In [71]:
step = step_distance * search_direction
x = x + step



TypeError: unsupported operand type(s) for *: 'NoneType' and 'float'

In [44]:
fn = lambda x,y,z:x**2+y**2+z**2
x0 = [10,10,10]
x = np.array(x0).reshape(-1,1)
var_names = ['x'+str(idx) for idx in range(len(x))]

In [49]:
grad = get_grad(fn, x, var_names) ## replace this
search_direction = np.linalg.pinv(approx_hessian).dot(grad)
step_distance = line_search(fn, grad, x, search_direction)
step = step_distance * search_direction
x = x + step

NameError: name 'approx_hessian' is not defined