In [131]:
# 2.1 gradient descent

# Tiago Moreira Trocoli da Cunha
# numero: 226078

import scipy.optimize as optimize
import numpy as np

# function to be minimize
def f(params):
    x1, x2, x3 = params
    return -(100*(x1 - x2**2) - (x1 - 1)**2 + 90*(x2 - x3**2) - (x2 - 1)**2)

# gradient of the function
def gradient(params):
    x1, x2, x3 = params
    grad       = np.zeros((3,1))
    grad[0][0] = 2*(params[0] - 51)
    grad[1][0] = 202*params[1] - 92
    grad[2][0] = 180*params[2]
    return grad

# convergence test
def stopping_criteria(x_new, x_old, tol):
    rel1 = np.absolute((x_new[0][0] - x_old[0][0])/x_old[0][0])
    rel2 = np.absolute((x_new[1][0] - x_old[1][0])/x_old[1][0])
    rel3 = np.absolute((x_new[2][0] - x_old[2][0])/x_old[2][0])
    
    if rel1 < tol and rel2 < tol and rel3 < tol:
        return True
    return False

# gradient descent
def gradient_descent(function, alpha, tol):
    
    # initialize x equal to (0.5,0.5,0.5)
    x = np.full((3,1), 0.5)
    
    while True:
        grad = gradient(x)
        x_new = x - alpha*grad
        
        if(stopping_criteria(x_new,x,tol)):
            return x, f(x)
        x = x_new
    
(argmin, mini) = gradient_descent(f,10**(-6),10**(-5))
print 'x = (', argmin.T, ')'
print 'min ', mini

x = ( [[5.09857609e+001 4.55445545e-001 1.37202030e-320]] )
min  [-2619.9502923]


In [130]:
# 2.2 Descida do gradiente com busca em linha
from scipy.optimize import line_search

# gradient of the function
def grad(params):
    x1, x2, x3 = params
    return [2*(x1 - 51), 202*x2 - 92, 180*x3]

line_search(f,grad,xk = np.array([0.5,0.5, 0.5]),pk = np.array([1,0.2,0.2]))

(1.0, 2, 1, -119.56, -47.0, [-99.0, 49.39999999999998, 125.99999999999999])

In [10]:
# 2.3 L-BFGS

result = optimize.minimize(f, [0.5,0.5,0.5], method = 'L-BFGS-B')
print 'x = (', result.x, ')'
print 'min ', f(result.x)

x = ( [ 5.10000087e+01  4.55445748e-01 -5.81136440e-07] )
min  -2619.9504950493942


In [11]:
# 2.4 Nelder-Mead

bnds = [[-10,10], [-10,10], [-10,10]]
result = optimize.minimize(f, [0.5,0.5,0.5], method = 'Nelder-Mead')
print 'x = (', result.x, ')'
print 'min ', f(result.x)

x = ( [5.09999326e+01 4.55441931e-01 5.11734839e-06] )
min  -2619.9504950412893


In [9]:
# 2.5 BOBYQA

# Define the starting point
import pybobyqa

# staring point
x0 = np.array([0.5, 0.5, 0.5])

# Define constraints (lower <= x <= upper)
lower = np.array([-10.0, -10.0, -10.0])
upper = np.array([10.0, 10.0, 10.0])

# Minimize
soln = pybobyqa.solve(f, x0, bounds=(lower,upper))

# Display output
print soln

****** Py-BOBYQA Results ******
Solution xmin = [1.00000000e+01 4.55445538e-01 8.28930770e-09]
Objective value f(xmin) = -938.950495
Needed 50 objective evaluations (at 50 points)
Approximate gradient = [-8.20000124e+01  5.35714737e-06 -5.04696502e-06]
Approximate Hessian = [[-281.97844229   72.97080512   45.35139857]
 [  72.97080512  717.70251729 -347.43191852]
 [  45.35139857 -347.43191852  416.09160274]]
Exit flag = 0
Success: rho has reached rhoend
******************************



In [81]:
# Obs: por algum motivo, o tensorflow maximiza em vez de minimizar
# quando ha multiplicação de minimizar quando ha multiplicação de constantes,
# no caso, 100 e 90. tirando estas constantes, ele consegue minimizar.
# provavelemte, teria que inicializar as constantes, procurei
# na internet e não encontrei nada muito claro a respesto. Infelizmente,
# não tenho dominio de TensorFlow.

import tensorflow as tf

x1 = tf.Variable(0.5, name='x1', dtype=tf.float32)
x2 = tf.Variable(0.5, name='x2', dtype=tf.float32)
x3 = tf.Variable(0.5, name='x3', dtype=tf.float32)

fx = -(100*(x1 - x2*x2) - (x1 - 1)*(x1 - 1) + 90*(x2 - x3*x3) - (x2 - 1)*(x2 - 1))

loss = fx
opt  = tf.train.GradientDescentOptimizer(0.1).minimize(fx)

init = tf.global_variables_initializer()
with tf.Session() as sess:
   sess.run(tf.global_variables_initializer())
   for i in range(10):
     print(sess.run([x1,x2,x3,loss]))
     sess.run(opt)

[0.5, 0.5, 0.5, -47.0]
[10.6, -0.40000004, -8.5, 5588.62]
[18.68, 16.880001, 144.5, 1904893.4]
[25.144001, -314.89603, -2456.5, 553137500.0]
[30.3152, 6055.204, 41760.5, 160657200000.0]
[34.45216, -116250.73, -709928.5, 46724810000000.0]
[37.761726, 2232023.2, 12068784.0, 1.3612174e+16]
[40.40938, -42854836.0, -205169340.0, 3.9739915e+18]
[42.527504, 822812860.0, 3487879000.0, 1.1632561e+21]
[44.222004, -15798007000.0, -59293946000.0, 3.4162675e+23]


In [87]:
# sem as constantes 100 e 90, o tensorflow encontra o minimo global, -1.3749988,
# que de acordo com o Wolphram Alpha também é -11/8 = -1.3749988.

import tensorflow as tf

x1 = tf.Variable(0.5, name='x1', dtype=tf.float32)
x2 = tf.Variable(0.5, name='x2', dtype=tf.float32)
x3 = tf.Variable(0.5, name='x3', dtype=tf.float32)

fx = -((x1 - x2*x2) - (x1 - 1)*(x1 - 1) + (x2 - x3*x3) - (x2 - 1)*(x2 - 1))

loss = fx
opt  = tf.train.GradientDescentOptimizer(0.1).minimize(fx)

init = tf.global_variables_initializer()
with tf.Session() as sess:
   sess.run(tf.global_variables_initializer())
   for i in range(20):
     print(sess.run([x1,x2,x3,loss]))
     sess.run(opt)

[0.5, 0.5, 0.5, -0.0]
[0.7, 0.6, 0.4, -0.53]
[0.86, 0.66, 0.32, -0.84679997]
[0.98800004, 0.696, 0.25599998, -1.0414879]
[1.0904, 0.7176, 0.20479998, -1.1631852]
[1.17232, 0.73056, 0.16383998, -1.2400265]
[1.237856, 0.73833597, 0.13107198, -1.2888286]
[1.2902849, 0.7430016, 0.10485759, -1.3199265]
[1.332228, 0.745801, 0.08388607, -1.3397803]
[1.3657824, 0.7474806, 0.067108855, -1.3524694]
[1.3926259, 0.7484883, 0.053687084, -1.3605839]
[1.4141008, 0.749093, 0.04294967, -1.365775]
[1.4312806, 0.7494558, 0.034359735, -1.3690964]
[1.4450245, 0.7496735, 0.027487788, -1.3712219]
[1.4560196, 0.7498041, 0.02199023, -1.3725821]
[1.4648157, 0.74988246, 0.017592184, -1.3734525]
[1.4718525, 0.7499295, 0.014073747, -1.3740096]
[1.4774821, 0.7499577, 0.011258998, -1.374366]
[1.4819857, 0.7499746, 0.009007199, -1.3745944]
[1.4855886, 0.74998474, 0.007205759, -1.3747404]
