# Rosenbock problem

In [1]:
import numpy as np
import tensorflow as tf
import math

def rosenbrock(x, y, a, b):
    return (a - x)**2 + b*(y - x**2)**2

In [2]:
data_points = np.array([[x, y, rosenbrock(x, y, 2.5, 2.5)]
                       for x in np.arange(-2, 2.1, 2) 
                       for y in np.arange(-2, 2.1, 2)])
m = data_points.shape[0]

In [7]:
x = tf.placeholder(tf.float64, shape=[m, 2])
y = tf.placeholder(tf.float64, shape=[m])
p = tf.Variable([5., 5.], dtype=tf.float64)
y_hat = rosenbrock(x[:, 0], x[:, 1], p[0], p[1])
r = y - y_hat
loss = tf.reduce_mean(r**2)

Test guess

In [8]:
feed_dict = {x: data_points[:,0:2], y: data_points[:,2]}
session = tf.Session()
session.run(tf.global_variables_initializer())
current_loss = session.run(loss, feed_dict)
print(current_loss)

3868.2291666666665


In [9]:
def train(target_loss, max_steps, loss_tensor, train_step_op, inputs):
    step = 0
    current_loss = session.run(loss_tensor, inputs)
    while current_loss > target_loss and step < max_steps:
        step += 1
        if math.log(step, 2).is_integer():
            print(f'step: {step}, current loss: {current_loss}')
        session.run(train_step_op, inputs)
        current_loss = session.run(loss_tensor, inputs)
    print(f'ENDED ON STEP: {step}, FINAL LOSS: {current_loss}')

SGD

In [10]:
grad = tf.gradients(loss, p)[0]
learning_rate = 0.0005
opt = tf.train.GradientDescentOptimizer(learning_rate=1)
sgd = opt.apply_gradients([(learning_rate*grad, p)])
session.run(tf.global_variables_initializer())
train(1e-10, 40000, loss, sgd, feed_dict)
print('PARAMETERS:', session.run(p))

step: 1, current loss: 3868.2291666666665
step: 2, current loss: 1381.5379689135807
step: 4, current loss: 224.7373049641391
step: 8, current loss: 39.36606191164495
step: 16, current loss: 21.251396378934388
step: 32, current loss: 8.262024313710544
step: 64, current loss: 1.5494658076417605
step: 128, current loss: 0.07505392682364925
step: 256, current loss: 0.00022995372615102207
step: 512, current loss: 2.3476189945407435e-09
ENDED ON STEP: 582, FINAL LOSS: 9.698531013308539e-11
PARAMETERS: [2.50000205 2.49999959]


Adam

In [11]:
adm = tf.train.AdamOptimizer(15).minimize(loss)
session.run(tf.global_variables_initializer())
train(1e-10, 40000, loss, adm, feed_dict)
print('PARAMETERS:', session.run(p))

step: 1, current loss: 3868.2291666666665
step: 2, current loss: 34205.72916492337
step: 4, current loss: 30529.109726416245
step: 8, current loss: 6066.123962286118
step: 16, current loss: 30745.555871420638
step: 32, current loss: 489.359011928145
step: 64, current loss: 114.83695692773432
step: 128, current loss: 0.1833651319270783
step: 256, current loss: 4.580126253823154e-07
ENDED ON STEP: 317, FINAL LOSS: 2.424142717541396e-12
PARAMETERS: [2.49999969 2.50000008]


Newton's method

In [12]:
hess = tf.hessians(loss, p)[0]
grad_col = tf.expand_dims(grad, -1)
dp = tf.matmul(tf.linalg.inv(hess), grad_col)
dp = tf.squeeze(dp)
newton = opt.apply_gradients([(dp, p)])
session.run(tf.global_variables_initializer())
train(1e-10, 40000, loss, newton, feed_dict)
print('PARAMETERS:', session.run(p))

step: 1, current loss: 3868.2291666666665
step: 2, current loss: 105.04357496954214
step: 4, current loss: 0.3307135454853347
ENDED ON STEP: 6, FINAL LOSS: 5.882202372519996e-20
PARAMETERS: [2.5 2.5]
