In [1]:
import tensorflow as tf
import numpy as np
from pitrain.wolfe_line_search import Wolfe_Line_Search

## Prepare `f`, `df` 
`f` will be a tensorflow function of several tensor arguments.

In [2]:
floatX = np.float32
sess = tf.InteractiveSession()

In [3]:
u = tf.Variable([1, 1], name = "u", dtype = floatX)
v = tf.Variable([1, 1], name = "v", dtype = floatX)
sess.run([u.initializer, v.initializer])

[None, None]

In [4]:
f = tf.reduce_sum(u**2) + tf.reduce_sum(v**2)

In [5]:
df = tf.gradients(f, [u, v])

In [6]:
sess.run([f, df])

[4.0, [array([2., 2.], dtype=float32), array([2., 2.], dtype=float32)]]

## Ordinary python function of one vector variable
To turn `f` into a function of one numpy vector variable we use an axilliary class `Splitter`.

In [14]:
from pitrain.splitter import Splitter

In [8]:
splitter = Splitter([u, v])

In [11]:
def F(x):
    return sess.run(f, feed_dict = splitter.split_dic(x))

def dF(x):
    grads = sess.run(
        df, 
        feed_dict = splitter.split_dic(x)
    )
    return splitter.join(grads)

In [15]:
x0 = splitter.current_x()
print(x0)
print(F(x0))
print(dF(x0))

[1. 1. 1. 1.]
4.0
[2. 2. 2. 2.]


## Optimization

In [31]:
from importlib import reload
import pitrain.misc
reload(pitrain.misc)
import pitrain.wolfe_line_search
reload(pitrain.wolfe_line_search)

from pitrain.wolfe_line_search import Wolfe_Line_Search, BFGS_update_B
import numpy.linalg as la
#from pitrain.misc import BFGS_update_B


In [26]:
wolfe_lin_srch = Wolfe_Line_Search(
    f = F, df = dF,
    
    c1=0.01, c2=0.4,
    
    max_noof_bracketing_steps=20, 
    max_noof_zooming_steps=40,
)

In [29]:
# initialization
X = x0 = splitter.current_x()
F_X = F(X)
dF_X = dF(X)

In [32]:
## take an initial guess on B.
learning_rate_2 = 0.000001
H = learning_rate_2 * np.eye(len(X))
B = la.inv(H)

## print
print("X = {}".format(X))
print("F_X = {}".format(F_X))
print("dF_X = {}".format(dF_X))
print("B= {}".format(B))

X = [1. 1. 1. 1.]
F_X = 4.0
dF_X = [2. 2. 2. 2.]
B= [[1000000.       0.       0.       0.]
 [      0. 1000000.       0.       0.]
 [      0.       0. 1000000.       0.]
 [      0.       0.       0. 1000000.]]


In [33]:
for n in range(400):
    # BFGS Method
    ## find the direction `p`
    P = - la.inv(B) @ dF_X

    ## perform the line search ; 
    ## `lsr` = line search results  
    lsr = wolfe_lin_srch.search(
        x = X, p = P,
        f_x = F_X, df_x = dF_X,
        alpha_init = 1
    )

    ## calculate BFGS update
    B = BFGS_update_B(
        x0 = X,
        df_x0 = dF_X,
        B0= B, 
        x1 = lsr.x_new,
        df_x1 = lsr.df_x_new 
    )

    ## update `x, f_x, df_x`
    X, F_X, dF_X = lsr.x_new, lsr.f_x_new, lsr.df_x_new
    
    ## assign the value of `X` to corresponding tensors if you want 
    # splitter.assign_tensors(X)

    ## plotting and printing
    #clear_output(True)
    print("x ={}; f(x) = {}".format(X, F_X))
    

New bracketing step with a_prev=0; a=1
Our a=1 seems not to be large enough. Try with larger a =2
New bracketing step with a_prev=1; a=2
Our a=2 seems not to be large enough. Try with larger a =4
New bracketing step with a_prev=2; a=4
Our a=4 seems not to be large enough. Try with larger a =8
New bracketing step with a_prev=4; a=8
Our a=8 seems not to be large enough. Try with larger a =16
New bracketing step with a_prev=8; a=16
Our a=16 seems not to be large enough. Try with larger a =32
New bracketing step with a_prev=16; a=32
Our a=32 seems not to be large enough. Try with larger a =64
New bracketing step with a_prev=32; a=64
Our a=64 seems not to be large enough. Try with larger a =128
New bracketing step with a_prev=64; a=128
Our a=128 seems not to be large enough. Try with larger a =256
New bracketing step with a_prev=128; a=256
Our a=256 seems not to be large enough. Try with larger a =512
New bracketing step with a_prev=256; a=512
Our a=512 seems not to be large enough. Try wit

Exception: Too many steps in zooming phase.

In [35]:
F(X), dF(X)

(0.0, array([-3.7512805e-24, -3.7512805e-24, -4.1648708e-24, -4.1648708e-24],
       dtype=float32))

In [40]:
for var, grad in splitter.split_dic(dF(X)).items():
    print(f"grad f wrt {var.name} is {grad}")

grad f wrt u:0 is [-3.7512805e-24 -3.7512805e-24]
grad f wrt v:0 is [-4.164871e-24 -4.164871e-24]
