# Scaled Gradient Method

In [1]:
using Plots, ForwardDiff

## Gradient method with backtracking

In [2]:
# Gradient method with backtracking linesearch
function grad_method_backtracking(fObj, gObj, x0; ϵ=1e-6, μ=1e-5, maxits = 1000)
    x = copy(x0)
    f = fObj(x)
    ∇f = gObj(x)
    k = 0
    xtrace = x
    while norm(∇f) > ϵ && k < maxits
        α = 1.0
        while ( f - fObj(x-α*∇f) < μ*α*dot(∇f,∇f) )
            α /= 2
        end
        x = x - α*∇f
        f = fObj(x)
        ∇f = gObj(x)
        @printf "it = %3d | |∇f| = %8.2e | f = %8.2e\n" k norm(∇f) f
        k += 1; xtrace = hcat(xtrace,x)
    end
    return x, xtrace
end;

### Ill-conditioned quadratic

## Rosenbrock function
$$
f(x) = 100(x_2 - x_1^2)^2 + (1-x_1)^2
$$

## Scaling

In [3]:
# Gradient method with backtracking linesearch
function grad_method_scaled_backtracking(fObj, gObj, Dobj, x0; ϵ=1e-6, μ=1e-5, maxits = 1000)
    n = length(x0)
    x = copy(x0)
    f = fObj(x)
    ∇f = gObj(x)
    D = Dobj(x)
    k = 0
    xtrace = x
    while norm(∇f) > ϵ && k < maxits
        α = 1.0
        d = D*∇f
        while ( f - fObj(x-α*d) < μ*α*dot(∇f,d) )
            α /= 2
        end
        x = x - α*d
        f = fObj(x)
        ∇f = gObj(x)
        D = Dobj(x)
        @printf "it = %3d | |∇f| = %8.2e | f = %8.2e\n" k norm(∇f) f
        k += 1; xtrace = hcat(xtrace,x)
    end
    return x, xtrace
end;

In [4]:
f(x) = 100(x[2]-x[1]^2)^2 + (1-x[1])^2
∇f(x) = ForwardDiff.gradient(f, x)
D(x) = inv(ForwardDiff.hessian(f,x) + 1e-3*eye(2))
x0 = [2,5]
x, xtrace = grad_method_scaled_backtracking(f, ∇f, D, x0);

it =   0 | |∇f| = 2.03e+00 | f = 1.01e+00
it =   1 | |∇f| = 4.65e+01 | f = 9.52e-01
it =   2 | |∇f| = 3.65e+00 | f = 4.91e-01
it =   3 | |∇f| = 3.01e+01 | f = 4.55e-01
it =   4 | |∇f| = 2.22e+00 | f = 1.95e-01
it =   5 | |∇f| = 1.39e+01 | f = 1.42e-01
it =   6 | |∇f| = 1.77e+00 | f = 5.80e-02
it =   7 | |∇f| = 4.15e+00 | f = 3.13e-02
it =   8 | |∇f| = 2.09e+00 | f = 1.09e-02
it =   9 | |∇f| = 1.41e+00 | f = 2.66e-03
it =  10 | |∇f| = 3.61e-01 | f = 3.00e-04
it =  11 | |∇f| = 8.70e-02 | f = 7.61e-06
it =  12 | |∇f| = 1.85e-03 | f = 7.48e-09
it =  13 | |∇f| = 2.79e-06 | f = 6.45e-14
it =  14 | |∇f| = 5.51e-10 | f = 3.79e-19


Let's try the same problem, but with the unscaled gradient.

In [15]:
f(x) = 100(x[2]-x[1]^2)^2 + (1-x[1])^2
∇f(x) = ForwardDiff.gradient(f, x)
D(x) = eye(2)
x0 = [2,5]
x, xtrace = grad_method_scaled_backtracking(f, ∇f, D, x0, maxits=100);

it =   0 | |∇f| = 7.92e+02 | f = 6.72e+01
it =   1 | |∇f| = 8.80e+01 | f = 2.44e+00
it =   2 | |∇f| = 5.41e-01 | f = 1.49e+00
it =   3 | |∇f| = 4.56e+00 | f = 1.49e+00
it =   4 | |∇f| = 4.66e+00 | f = 1.49e+00
it =   5 | |∇f| = 4.76e+00 | f = 1.49e+00
it =   6 | |∇f| = 4.86e+00 | f = 1.49e+00
it =   7 | |∇f| = 4.97e+00 | f = 1.49e+00
it =   8 | |∇f| = 5.07e+00 | f = 1.49e+00
it =   9 | |∇f| = 5.18e+00 | f = 1.49e+00
it =  10 | |∇f| = 5.39e-01 | f = 1.49e+00
it =  11 | |∇f| = 7.74e+00 | f = 1.49e+00
it =  12 | |∇f| = 5.39e-01 | f = 1.48e+00
it =  13 | |∇f| = 4.36e+00 | f = 1.48e+00
it =  14 | |∇f| = 4.42e+00 | f = 1.48e+00
it =  15 | |∇f| = 4.47e+00 | f = 1.48e+00
it =  16 | |∇f| = 4.52e+00 | f = 1.48e+00
it =  17 | |∇f| = 4.57e+00 | f = 1.48e+00
it =  18 | |∇f| = 4.62e+00 | f = 1.48e+00
it =  19 | |∇f| = 4.68e+00 | f = 1.48e+00
it =  20 | |∇f| = 4.73e+00 | f = 1.48e+00
it =  21 | |∇f| = 4.78e+00 | f = 1.48e+00
it =  22 | |∇f| = 4.84e+00 | f = 1.47e+00
it =  23 | |∇f| = 4.89e+00 | f = 1