# Newton's Method

In [50]:
using Plots, ForwardDiff

In [51]:
function pure_newton(fObj, gObj, HObj, x0; ϵ=1e-5, maxits=500)
    x = copy(x0)
    g, H = gObj(x), HObj(x)
    k = 0
    while ( norm(g)>ϵ && k < maxits )
        d = - H \ g # Newton equations
        x = x + d
        g, H = gObj(x), HObj(x)
        @printf "k = %3d | f = %10.2e | g = %10.2e | λmin = %10.2e\n" k fObj(x) norm(g) minimum(eigvals(H))
        k += 1
    end
end;

In [52]:
# Gradient method with backtracking linesearch
function grad_method_backtracking(fObj, gObj, x0; ϵ=1e-6, μ=1e-5, maxits = 1000)
    x = copy(x0)
    f = fObj(x)
    ∇f = gObj(x)
    k = 0
    xtrace = x
    while norm(∇f) > ϵ && k < maxits
        α = 1.0
        while ( f - fObj(x-α*∇f) < μ*α*dot(∇f,∇f) )
            α /= 2
        end
        x = x - α*∇f
        f = fObj(x)
        ∇f = gObj(x)
        @printf "it = %3d | |∇f| = %8.2e | f = %8.2e\n" k norm(∇f) f
        k += 1; xtrace = hcat(xtrace,x)
    end
    return x, xtrace
end;

In [53]:
f(x) = 100x[1]^4 + 0.01x[2]^4
g(x) = ForwardDiff.gradient(f, x)
H(x) = ForwardDiff.hessian(f, x)

x0 = [1,1]

pure_newton(f, g, H, x0)

k =   0 | f =   1.98e+01 | g =   1.19e+02 | λmin =   5.33e-02
k =   1 | f =   3.90e+00 | g =   3.51e+01 | λmin =   2.37e-02
k =   2 | f =   7.71e-01 | g =   1.04e+01 | λmin =   1.05e-02
k =   3 | f =   1.52e-01 | g =   3.08e+00 | λmin =   4.68e-03
k =   4 | f =   3.01e-02 | g =   9.13e-01 | λmin =   2.08e-03
k =   5 | f =   5.94e-03 | g =   2.71e-01 | λmin =   9.25e-04
k =   6 | f =   1.17e-03 | g =   8.02e-02 | λmin =   4.11e-04
k =   7 | f =   2.32e-04 | g =   2.38e-02 | λmin =   1.83e-04
k =   8 | f =   4.58e-05 | g =   7.04e-03 | λmin =   8.12e-05
k =   9 | f =   9.04e-06 | g =   2.09e-03 | λmin =   3.61e-05
k =  10 | f =   1.79e-06 | g =   6.18e-04 | λmin =   1.60e-05
k =  11 | f =   3.53e-07 | g =   1.83e-04 | λmin =   7.13e-06
k =  12 | f =   6.97e-08 | g =   5.43e-05 | λmin =   3.17e-06
k =  13 | f =   1.38e-08 | g =   1.61e-05 | λmin =   1.41e-06
k =  14 | f =   2.72e-09 | g =   4.76e-06 | λmin =   6.26e-07


In [54]:
grad_method_backtracking(f, g, x0);

it =   0 | |∇f| = 7.12e+01 | f = 1.00e+01
it =   1 | |∇f| = 6.65e+01 | f = 9.15e+00
it =   2 | |∇f| = 4.68e+01 | f = 5.74e+00
it =   3 | |∇f| = 5.71e+00 | f = 3.56e-01
it =   4 | |∇f| = 5.98e-01 | f = 2.69e-02
it =   5 | |∇f| = 4.19e-02 | f = 9.59e-03
it =   6 | |∇f| = 3.41e-02 | f = 8.06e-03
it =   7 | |∇f| = 3.05e-02 | f = 6.96e-03
it =   8 | |∇f| = 2.75e-02 | f = 6.08e-03
it =   9 | |∇f| = 2.50e-02 | f = 5.35e-03
it =  10 | |∇f| = 2.29e-02 | f = 4.75e-03
it =  11 | |∇f| = 2.11e-02 | f = 4.25e-03
it =  12 | |∇f| = 1.95e-02 | f = 3.82e-03
it =  13 | |∇f| = 1.80e-02 | f = 3.46e-03
it =  14 | |∇f| = 1.68e-02 | f = 3.15e-03
it =  15 | |∇f| = 1.57e-02 | f = 2.87e-03
it =  16 | |∇f| = 1.47e-02 | f = 2.63e-03
it =  17 | |∇f| = 1.38e-02 | f = 2.42e-03
it =  18 | |∇f| = 1.30e-02 | f = 2.24e-03
it =  19 | |∇f| = 1.23e-02 | f = 2.07e-03
it =  20 | |∇f| = 1.16e-02 | f = 1.93e-03
it =  21 | |∇f| = 1.10e-02 | f = 1.80e-03
it =  22 | |∇f| = 1.05e-02 | f = 1.68e-03
it =  23 | |∇f| = 9.97e-03 | f = 1

it = 232 | |∇f| = 4.69e-04 | f = 2.67e-05
it = 233 | |∇f| = 4.66e-04 | f = 2.64e-05
it = 234 | |∇f| = 4.64e-04 | f = 2.62e-05
it = 235 | |∇f| = 4.61e-04 | f = 2.60e-05
it = 236 | |∇f| = 4.58e-04 | f = 2.58e-05
it = 237 | |∇f| = 4.55e-04 | f = 2.56e-05
it = 238 | |∇f| = 4.52e-04 | f = 2.54e-05
it = 239 | |∇f| = 4.50e-04 | f = 2.52e-05
it = 240 | |∇f| = 4.47e-04 | f = 2.50e-05
it = 241 | |∇f| = 4.44e-04 | f = 2.48e-05
it = 242 | |∇f| = 4.42e-04 | f = 2.46e-05
it = 243 | |∇f| = 4.39e-04 | f = 2.44e-05
it = 244 | |∇f| = 4.36e-04 | f = 2.42e-05
it = 245 | |∇f| = 4.34e-04 | f = 2.40e-05
it = 246 | |∇f| = 4.31e-04 | f = 2.38e-05
it = 247 | |∇f| = 4.29e-04 | f = 2.36e-05
it = 248 | |∇f| = 4.26e-04 | f = 2.35e-05
it = 249 | |∇f| = 4.24e-04 | f = 2.33e-05
it = 250 | |∇f| = 4.21e-04 | f = 2.31e-05
it = 251 | |∇f| = 4.19e-04 | f = 2.29e-05
it = 252 | |∇f| = 4.17e-04 | f = 2.27e-05
it = 253 | |∇f| = 4.14e-04 | f = 2.26e-05
it = 254 | |∇f| = 4.12e-04 | f = 2.24e-05
it = 255 | |∇f| = 4.09e-04 | f = 2

it = 427 | |∇f| = 1.93e-04 | f = 8.16e-06
it = 428 | |∇f| = 1.93e-04 | f = 8.13e-06
it = 429 | |∇f| = 1.92e-04 | f = 8.09e-06
it = 430 | |∇f| = 1.91e-04 | f = 8.05e-06
it = 431 | |∇f| = 1.91e-04 | f = 8.02e-06
it = 432 | |∇f| = 1.90e-04 | f = 7.98e-06
it = 433 | |∇f| = 1.89e-04 | f = 7.94e-06
it = 434 | |∇f| = 1.89e-04 | f = 7.91e-06
it = 435 | |∇f| = 1.88e-04 | f = 7.87e-06
it = 436 | |∇f| = 1.87e-04 | f = 7.84e-06
it = 437 | |∇f| = 1.87e-04 | f = 7.80e-06
it = 438 | |∇f| = 1.86e-04 | f = 7.77e-06
it = 439 | |∇f| = 1.85e-04 | f = 7.73e-06
it = 440 | |∇f| = 1.85e-04 | f = 7.70e-06
it = 441 | |∇f| = 1.84e-04 | f = 7.66e-06
it = 442 | |∇f| = 1.84e-04 | f = 7.63e-06
it = 443 | |∇f| = 1.83e-04 | f = 7.60e-06
it = 444 | |∇f| = 1.82e-04 | f = 7.56e-06
it = 445 | |∇f| = 1.82e-04 | f = 7.53e-06
it = 446 | |∇f| = 1.81e-04 | f = 7.50e-06
it = 447 | |∇f| = 1.81e-04 | f = 7.46e-06
it = 448 | |∇f| = 1.80e-04 | f = 7.43e-06
it = 449 | |∇f| = 1.79e-04 | f = 7.40e-06
it = 450 | |∇f| = 1.79e-04 | f = 7

it = 660 | |∇f| = 1.02e-04 | f = 3.47e-06
it = 661 | |∇f| = 1.02e-04 | f = 3.46e-06
it = 662 | |∇f| = 1.01e-04 | f = 3.45e-06
it = 663 | |∇f| = 1.01e-04 | f = 3.44e-06
it = 664 | |∇f| = 1.01e-04 | f = 3.43e-06
it = 665 | |∇f| = 1.01e-04 | f = 3.42e-06
it = 666 | |∇f| = 1.00e-04 | f = 3.41e-06
it = 667 | |∇f| = 1.00e-04 | f = 3.40e-06
it = 668 | |∇f| = 9.99e-05 | f = 3.39e-06
it = 669 | |∇f| = 9.97e-05 | f = 3.38e-06
it = 670 | |∇f| = 9.95e-05 | f = 3.37e-06
it = 671 | |∇f| = 9.93e-05 | f = 3.36e-06
it = 672 | |∇f| = 9.91e-05 | f = 3.35e-06
it = 673 | |∇f| = 9.89e-05 | f = 3.34e-06
it = 674 | |∇f| = 9.86e-05 | f = 3.33e-06
it = 675 | |∇f| = 9.84e-05 | f = 3.32e-06
it = 676 | |∇f| = 9.82e-05 | f = 3.31e-06
it = 677 | |∇f| = 9.80e-05 | f = 3.30e-06
it = 678 | |∇f| = 9.78e-05 | f = 3.29e-06
it = 679 | |∇f| = 9.76e-05 | f = 3.28e-06
it = 680 | |∇f| = 9.74e-05 | f = 3.27e-06
it = 681 | |∇f| = 9.71e-05 | f = 3.26e-06
it = 682 | |∇f| = 9.69e-05 | f = 3.25e-06
it = 683 | |∇f| = 9.67e-05 | f = 3

Example of a problem where pure Newton may fail to outperform the gradient method.

In [55]:
f(x) = sqrt(x[1]^2+1) + sqrt(x[2]^2+1)
g(x) = ForwardDiff.gradient(f, x)
H(x) = ForwardDiff.hessian(f, x)
x0 = [.9,.9]
pure_newton(f, g, H, x0)

k =   0 | f =   2.48e+00 | g =   8.33e-01 | λmin =   5.28e-01
k =   1 | f =   2.14e+00 | g =   5.11e-01 | λmin =   8.11e-01
k =   2 | f =   2.00e+00 | g =   8.21e-02 | λmin =   9.95e-01
k =   3 | f =   2.00e+00 | g =   2.78e-04 | λmin =   1.00e+00
k =   4 | f =   2.00e+00 | g =   1.08e-11 | λmin =   1.00e+00


In [56]:
grad_method_backtracking(f, g, x0);

it =   0 | |∇f| = 3.18e-01 | f = 2.05e+00
it =   1 | |∇f| = 8.39e-03 | f = 2.00e+00
it =   2 | |∇f| = 1.47e-07 | f = 2.00e+00


In [57]:
f(x) = 100(x[2]-x[1]^2)^2 + (1-x[1])^2
g(x) = ForwardDiff.gradient(f, x)
H(x) = ForwardDiff.hessian(f,x)
x0 = [2,5]
pure_newton(f, g, H, x0);

k =   0 | f =   1.01e+00 | g =   2.03e+00 | λmin =   1.18e-01
k =   1 | f =   1.00e+02 | g =   4.49e+02 | λmin =   5.95e+01
k =   2 | f =   2.53e-05 | g =   1.01e-02 | λmin =   3.96e-01
k =   3 | f =   6.38e-08 | g =   1.13e-02 | λmin =   4.01e-01
k =   4 | f =   1.02e-23 | g =   6.38e-12 | λmin =   3.99e-01


In [49]:
grad_method_backtracking(f, g, x0);

it =   0 | |∇f| = 7.92e+02 | f = 6.72e+01
it =   1 | |∇f| = 8.80e+01 | f = 2.44e+00
it =   2 | |∇f| = 5.41e-01 | f = 1.49e+00
it =   3 | |∇f| = 4.56e+00 | f = 1.49e+00
it =   4 | |∇f| = 4.66e+00 | f = 1.49e+00
it =   5 | |∇f| = 4.76e+00 | f = 1.49e+00
it =   6 | |∇f| = 4.86e+00 | f = 1.49e+00
it =   7 | |∇f| = 4.97e+00 | f = 1.49e+00
it =   8 | |∇f| = 5.07e+00 | f = 1.49e+00
it =   9 | |∇f| = 5.18e+00 | f = 1.49e+00
it =  10 | |∇f| = 5.39e-01 | f = 1.49e+00
it =  11 | |∇f| = 7.74e+00 | f = 1.49e+00
it =  12 | |∇f| = 5.39e-01 | f = 1.48e+00
it =  13 | |∇f| = 4.36e+00 | f = 1.48e+00
it =  14 | |∇f| = 4.42e+00 | f = 1.48e+00
it =  15 | |∇f| = 4.47e+00 | f = 1.48e+00
it =  16 | |∇f| = 4.52e+00 | f = 1.48e+00
it =  17 | |∇f| = 4.57e+00 | f = 1.48e+00
it =  18 | |∇f| = 4.62e+00 | f = 1.48e+00
it =  19 | |∇f| = 4.68e+00 | f = 1.48e+00
it =  20 | |∇f| = 4.73e+00 | f = 1.48e+00
it =  21 | |∇f| = 4.78e+00 | f = 1.48e+00
it =  22 | |∇f| = 4.84e+00 | f = 1.47e+00
it =  23 | |∇f| = 4.89e+00 | f = 1

it = 232 | |∇f| = 5.63e+00 | f = 1.45e+00
it = 233 | |∇f| = 5.58e+00 | f = 1.45e+00
it = 234 | |∇f| = 5.52e+00 | f = 1.45e+00
it = 235 | |∇f| = 5.47e+00 | f = 1.45e+00
it = 236 | |∇f| = 5.42e+00 | f = 1.45e+00
it = 237 | |∇f| = 5.36e+00 | f = 1.45e+00
it = 238 | |∇f| = 5.31e+00 | f = 1.45e+00
it = 239 | |∇f| = 5.25e+00 | f = 1.45e+00
it = 240 | |∇f| = 5.20e+00 | f = 1.45e+00
it = 241 | |∇f| = 5.14e+00 | f = 1.44e+00
it = 242 | |∇f| = 5.09e+00 | f = 1.44e+00
it = 243 | |∇f| = 5.04e+00 | f = 1.44e+00
it = 244 | |∇f| = 4.98e+00 | f = 1.44e+00
it = 245 | |∇f| = 4.93e+00 | f = 1.44e+00
it = 246 | |∇f| = 4.88e+00 | f = 1.44e+00
it = 247 | |∇f| = 4.82e+00 | f = 1.44e+00
it = 248 | |∇f| = 4.77e+00 | f = 1.44e+00
it = 249 | |∇f| = 4.72e+00 | f = 1.44e+00
it = 250 | |∇f| = 4.66e+00 | f = 1.44e+00
it = 251 | |∇f| = 4.61e+00 | f = 1.44e+00
it = 252 | |∇f| = 4.56e+00 | f = 1.44e+00
it = 253 | |∇f| = 4.50e+00 | f = 1.44e+00
it = 254 | |∇f| = 4.45e+00 | f = 1.44e+00
it = 255 | |∇f| = 4.40e+00 | f = 1

it = 612 | |∇f| = 8.20e-01 | f = 1.39e+00
it = 613 | |∇f| = 7.97e-01 | f = 1.39e+00
it = 614 | |∇f| = 7.76e-01 | f = 1.39e+00
it = 615 | |∇f| = 7.56e-01 | f = 1.39e+00
it = 616 | |∇f| = 7.38e-01 | f = 1.39e+00
it = 617 | |∇f| = 1.59e+00 | f = 1.39e+00
it = 618 | |∇f| = 1.52e+00 | f = 1.39e+00
it = 619 | |∇f| = 1.45e+00 | f = 1.39e+00
it = 620 | |∇f| = 1.39e+00 | f = 1.39e+00
it = 621 | |∇f| = 1.34e+00 | f = 1.39e+00
it = 622 | |∇f| = 1.28e+00 | f = 1.39e+00
it = 623 | |∇f| = 1.23e+00 | f = 1.39e+00
it = 624 | |∇f| = 1.18e+00 | f = 1.39e+00
it = 625 | |∇f| = 1.13e+00 | f = 1.39e+00
it = 626 | |∇f| = 1.09e+00 | f = 1.39e+00
it = 627 | |∇f| = 1.05e+00 | f = 1.39e+00
it = 628 | |∇f| = 1.01e+00 | f = 1.39e+00
it = 629 | |∇f| = 9.74e-01 | f = 1.39e+00
it = 630 | |∇f| = 9.40e-01 | f = 1.39e+00
it = 631 | |∇f| = 9.08e-01 | f = 1.39e+00
it = 632 | |∇f| = 8.78e-01 | f = 1.39e+00
it = 633 | |∇f| = 8.50e-01 | f = 1.39e+00
it = 634 | |∇f| = 8.25e-01 | f = 1.39e+00
it = 635 | |∇f| = 8.00e-01 | f = 1