In [1]:
from __future__ import division
from __future__ import print_function
from __future__ import absolute_import

from sympy import *
import numpy as np

# BFGS
----------

In [13]:
def BFGSUpdate(xk, Bk, deriv1_xk, Q):
    """
        xk:        Current value 
        Bk:        Approximated 2nd order matrix
        deriv1_xk: First order derivative of function f(xk)
        Q :        Q formed when we convert the function in standard form
                   Also equivallent to Hessian matrix, Computation is needed only once
                   No update is required
        
        pk (Search direction) = - inv(Bk).deriv1_xk 
    """
    print('')
    print('')
    print(Bk)
    print('')
    print(deriv1_xk)
    print('')
    print('')
    pk = -1 * np.dot(np.linalg.inv(Bk),deriv1_xk)
    alpha = np.dot(np.transpose(pk), deriv1_xk) / np.dot(np.dot(np.transpose(pk), Q), pk)
    xk1 = xk - alpha*pk
    return xk1

def updateBk(step, deriv_1, xk, xk1, gradient_xk, Bk, numVariables):
    """
        step:             Step Num, int
        deriv_1           1st order derivative of the input function 
        xk:               previous solution
        xk1:              New Solution
        gradient_xk:      deriv_1 evaluated at xk1
        Bk:               Approximated 2nd order matrix
    """
    if numVariables == 3:
        gradient_xk1 =  np.array([eq.evalf(subs={x1:xk1[0], x2:xk1[1], x3:xk1[2]}) 
                                  for eq in deriv_1], dtype='float32')
    elif numVariables == 2:
        gradient_xk1 =  np.array([eq.evalf(subs={x1:xk1[0], x2:xk1[1]}) 
                                  for eq in deriv_1], dtype='float32')
    else:
        raise ValueError('Num Features doesnt match the hadled condition')
        
    print ('Gradient at xk%s: '%str(step), gradient_xk1)
    Sk = xk1 - xk
    print ('Sk at xk%s: \n'%str(step), Sk)
    Yk = gradient_xk1 - gradient_xk
    print ('Yk at xk%s: \n'%str(step), Yk)
    numerator = np.dot(np.dot(Bk,Sk),np.transpose(np.dot(Bk,Sk)))
    print ('numerator at xk%s: \n'%str(step), numerator)
    denominator = np.dot(np.transpose(Sk),np.dot(Bk,Sk))
    print ('denominator at xk%s: \n'%str(step), denominator)
    RHS = np.dot(Yk, np.transpose(Yk))/np.dot(np.transpose(Yk), Yk)
    print ('RHS at xk%s: \n'%str(step), RHS)
    Bk1 = Bk - (numerator/denominator) + RHS
    print ('Bk at xk%s: \n'%str(step), Bk1)
    return Bk1

def iterate(xk, deriv_1, Q, Bk, numFeatures, numSteps):
    for step in range(numSteps):
        print('')
        print ('##########################################################')
        if numFeatures == 3:
            gradient_xk = np.array([eq.evalf(subs={x1:xk[0], x2:xk[1], x3:xk[2]}) 
                                 for eq in deriv_1], dtype='float32')
        elif numFeatures == 2:
            gradient_xk = np.array([eq.evalf(subs={x1:xk[0], x2:xk[1]}) for eq in deriv_1], dtype='float32')
        else:
            raise ValueError('Num Features doesnt match the hadled condition')
        
        print ('Gradient at xk%s is: '%str(step), gradient_xk)
        gradientNorm = np.linalg.norm(gradient_xk)
        print ("####### Gradient Norm: ", gradientNorm)
        if gradientNorm == 0:
            break
            
        print ('xk at step %s: \n'%str(step), xk)
        xk1 = BFGSUpdate(xk=xk, Bk=Bk, deriv1_xk=gradient_xk, Q=Q)
        print ('xk1 at step %s: \n'%str(step), xk1)
        
        Bk1 = updateBk(step+1, deriv_1, xk, xk1, gradient_xk, Bk, numVariables=numFeatures)

        # Parameter UPdate
        xk = xk1
        Bk = Bk1

        print('The updated Value xk%s is \n'%(step+1), xk1)

#### Solution d(1)

In [14]:
x1 = Symbol('x1')
x2 = Symbol('x2')
x3 = Symbol('x3')

f = pow(x1,2) + pow(x2,2) + pow(x3,2)
deriv_f_x1 = f.diff(x1)
deriv_f_x2 = f.diff(x2)
deriv_f_x3 = f.diff(x3)

f = pow(x1,2) + pow(x2,2) + pow(x3,2)
deriv_1 = np.array([f.diff(x1), f.diff(x2), f.diff(x3)])
print ('First order Derivative: ', deriv_1)
Q = np.array([[2,0,0],[0,2,0],[0,0,2]])  # Q is obtained by converting the funciton into quadratic form, which is equivallent to 2nd order derivative in all our case

# Initialize BK
Bk = np.eye(3)

# Initialize Xk
xk = np.array([1,1,1])

iterate(xk, deriv_1, Q, Bk, numFeatures=3, numSteps=5)


First order Derivative:  [2*x1 2*x2 2*x3]

##########################################################
Gradient at xk0 is:  [ 2.  2.  2.]
####### Gradient Norm:  3.4641
xk at step 0: 
 [1 1 1]


[[ 1.  0.  0.]
 [ 0.  1.  0.]
 [ 0.  0.  1.]]

[ 2.  2.  2.]


xk1 at step 0: 
 [ 0.  0.  0.]
Gradient at xk1:  [ 0.  0.  0.]
Sk at xk1: 
 [-1. -1. -1.]
Yk at xk1: 
 [-2. -2. -2.]
numerator at xk1: 
 3.0
denominator at xk1: 
 3.0
RHS at xk1: 
 1.0
Bk at xk1: 
 [[ 1.  0.  0.]
 [ 0.  1.  0.]
 [ 0.  0.  1.]]
The updated Value xk1 is 
 [ 0.  0.  0.]

##########################################################
Gradient at xk1 is:  [ 0.  0.  0.]
####### Gradient Norm:  0.0


#### Solution d(2)

In [16]:
x1 = Symbol('x1')
x2 = Symbol('x2')
f = pow(x1,2) + 2*pow(x2,2) - 2*x1*x2 - 2*x2
deriv_1 = np.array([f.diff(x1), f.diff(x2)])
Q = np.array([[deriv_1[0].diff(x1),deriv_1[0].diff(x2)],
              [deriv_1[1].diff(x1),deriv_1[1].diff(x2)]])
print ('Q: \n', Q)

#Initialize BK
Bk = np.eye(2)

# Initialize Xk
xk = np.array([0,0], dtype='float32')

iterate(xk, deriv_1, Q, Bk, numFeatures=2, numSteps=5)

Q: 
 [[2 -2]
 [-2 4]]

##########################################################
Gradient at xk0 is:  [ 0. -2.]
####### Gradient Norm:  2.0
xk at step 0: 
 [ 0.  0.]


[[ 1.  0.]
 [ 0.  1.]]

[ 0. -2.]


xk1 at step 0: 
 [0.0 0.500000000000000]
Gradient at xk1:  [-1.  0.]
Sk at xk1: 
 [0 0.500000000000000]
Yk at xk1: 
 [-1.  2.]
numerator at xk1: 
 0.250000000000000
denominator at xk1: 
 0.250000000000000
RHS at xk1: 
 1.0
Bk at xk1: 
 [[1.00000000000000 0]
 [0 1.00000000000000]]
The updated Value xk1 is 
 [0.0 0.500000000000000]

##########################################################
Gradient at xk1 is:  [-1.  0.]
####### Gradient Norm:  1.0
xk at step 1: 
 [0.0 0.500000000000000]


[[1.00000000000000 0]
 [0 1.00000000000000]]

[-1.  0.]




TypeError: No loop matching the specified signature and casting
was found for ufunc inv

#### Solution d(2)

In [24]:
x1 = Symbol('x1')
x2 = Symbol('x2')
f = 100*pow(x2-pow(x1,2),2) + pow(1-x1,2)
deriv_1 = np.array([f.diff(x1), f.diff(x2)])
Q = np.array([[deriv_1[0].diff(x1),deriv_1[0].diff(x2)],
              [deriv_1[1].diff(x1),deriv_1[1].diff(x2)]])
print ('Q: \n', Q)

#Initialize BK
Bk = np.eye(2)

# Initialize Xk
xk = np.array([0,0], dtype='float32')

iterate(xk, deriv_1, Q, Bk, numFeatures=2, numSteps=5)

Q: 
 [[1200*x1**2 - 400*x2 + 2 -400*x1]
 [-400*x1 200]]

##########################################################
Gradient at xk0 is:  [-2.  0.]
####### Gradient Norm:  2.0
xk at step 0: 
 [ 0.  0.]


[[ 1.  0.]
 [ 0.  1.]]

[-2.  0.]


xk1 at step 0: 
 [8.0/(4800.0*x1**2 - 1600.0*x2 + 8.0) 0.0]


RecursionError: maximum recursion depth exceeded in comparison

In [23]:
a = np.array([[1.00000000000000, 0], [0, 1.00000000000000]], float)
np.dot(np.linalg.inv(a), np.array([-1.,  0]))

array([-1.,  0.])