# gradient_descent

In [3]:
function gradient_descent(f,g,x0;
        ϵg=0.001,ϵx=0.001,ϵf=0.001,maxIteractions=128,verbose=true)
    x1=x0
    d=-g(x1...)
    f1=f(x1...)
    ng=norm(d)
    α=inexact_method(f,g,f1,-ng*ng,x1,d)
    x2=x1+α.*d
    f2=f(x2...)

    nx=α*ng
    ni=0
    println("step",ni,  "  d=",d)
    println("  x",x1,   "  f=",f1)
            
    while((ng>ϵg || nx>ϵx || abs(f2-f1)>ϵf )&& ni<maxIteractions )
        x1=x2
        d=-g(x1...)
        f1=f2
        α=inexact_method(f,g,f1,-ng*ng,x1,d)
        x2=x1+α.*d
        f2=f(x2...)
        ng=norm(d)
        nx=α*ng
        ni+=1
        if verbose
            println("step",ni,"  d=",d, "  α=",α)
            println( " x",ni,x2, "  f=",f2)
            
        end
    end
    if ni>=maxIteractions
        println("warning:Iteractions exceeds",maxIteractions)
    end
    return x2,f2
end


gradient_descent (generic function with 1 method)

In [2]:
#回溯线性搜索法
function inexact_method(f,g,ϕ0,ϕd0,xk,dk;τ=0.5,ϵ=0.5,ζ=2)
    α=1
    dα=norm(dk'*g((xk.+α.*dk)...))
    while dα<0
        α=ζ*α
        dα=norm(dk'*g((xk.+α.*dk)...))
    end
    ϕα=f((xk.+α.*dk)...)
    while(ϕα>ϕ0+ϵ*α*ϕd0)
        α=τ*α
        ϕα=f((xk.+α.*dk)...)
    end
    return α 
end


inexact_method (generic function with 1 method)

In [4]:
gradient_descent(
(x,y)->(1-x)^2+100*(y-x^2)^2,
(x,y)->[-2*(1-x)-400x*(y-x^2) 200(y-x^2)]',#vector value
    [-2 2]'
    )

step0  d=[1606; 400]
  x[-2; 2]  f=409
step1  d=[318.899; 97.5438]  α=8.470329472543003e-22
 x1[-1.60791; 2.09766]  f=30.58816017707636
step2  d=[318.899; 97.5438]  α=0.000244140625
 x2[-1.53005; 2.12147]  f=11.223344393803819
step3  d=[139.457; 43.9189]  α=1.3552527156068805e-20
 x3[-1.53005; 2.12147]  f=11.223344393803819
step4  d=[139.457; 43.9189]  α=0.000244140625
 x4[-1.49601; 2.13219]  f=7.350338545051078
step5  d=[68.3292; 21.1687]  α=2.710505431213761e-20
 x5[-1.49601; 2.13219]  f=7.350338545051078
step6  d=[68.3292; 21.1687]  α=0.00048828125
 x6[-1.46264; 2.14253]  f=6.065637822822506
step7  d=[3.05043; -0.640914]  α=1.0842021724855044e-19
 x7[-1.46264; 2.14253]  f=6.065637822822506
step8  d=[3.05043; -0.640914]  α=0.00048828125
 x8[-1.46115; 2.14222]  f=6.062528273210443
step9  d=[0.686993; -1.4493]  α=5.551115123125783e-17
 x9[-1.46115; 2.14222]  f=6.062528273210443
step10  d=[0.686993; -1.4493]  α=1
 x10[-0.77416; 0.692912]  f=4.023510605374899
step11  d=[-25.4324; -18.717

 x115[0.754564; 0.567872]  f=0.06046213126054303
step116  d=[0.0396724; 0.29898]  α=0.0078125
 x116[0.754874; 0.570208]  f=0.060100530420604696
step117  d=[0.602892; -0.0746089]  α=0.001953125
 x117[0.756052; 0.570063]  f=0.05975153491758068
step118  d=[0.0185917; 0.310365]  α=1.3877787807814457e-17
 x118[0.756052; 0.570063]  f=0.05975153491758068
step119  d=[0.0185917; 0.310365]  α=0.00390625
 x119[0.756124; 0.571275]  f=0.059495476017360556
step120  d=[0.351866; 0.0898564]  α=0.00390625
 x120[0.757499; 0.571626]  f=0.05928145938366711
step121  d=[-0.175149; 0.435744]  α=0.001953125
 x121[0.757157; 0.572477]  f=0.05903833879837228
step122  d=[0.240515; 0.161902]  α=4.440892098500626e-16
 x122[0.757157; 0.572477]  f=0.059038338798372225
step123  d=[0.240515; 0.161902]  α=0.0078125
 x123[0.759036; 0.573742]  f=0.0586366604076639
step124  d=[-0.244807; 0.478723]  α=0.001953125
 x124[0.758558; 0.574677]  f=0.058348105996713404
step125  d=[0.260479; 0.146598]  α=1.3877787807814457e-17
 x12

([0.760279; 0.576641], 0.05765745021480267)

In [4]:
gradient_descent(
(x1,x2)->5*x1^2+x2^2+4*x1*x2-14*x1-6*x2+20,
(x1,x2)->[10*x1+4*x2-14 2*x2-6]',#vector value
    [2 3]'
    )

step0  d=[-18; 0]
  x[2; 3]  f=27
step1  d=[-6.75; -0.0]  α=3.469446951953614e-18
 x1[0.875; 3.0]  f=13.078125
step2  d=[-6.75; -0.0]  α=0.0625
 x2[0.453125; 3.0]  f=11.120361328125
step3  d=[-2.53125; -0.0]  α=1.1102230246251565e-16
 x3[0.453125; 3.0]  f=11.120361328124998
step4  d=[-2.53125; -0.0]  α=0.0625
 x4[0.294922; 3.0]  f=10.845050811767578
step5  d=[-0.949219; -0.0]  α=2.220446049250313e-16
 x5[0.294922; 3.0]  f=10.845050811767578
step6  d=[-0.949219; -0.0]  α=0.0625
 x6[0.235596; 3.0]  f=10.806335270404816
step7  d=[-0.355957; -0.0]  α=1.7763568394002505e-15
 x7[0.235596; 3.0]  f=10.806335270404816
step8  d=[-0.355957; -0.0]  α=0.0625
 x8[0.213348; 3.0]  f=10.800890897400677
step9  d=[-0.133484; -0.0]  α=1.4210854715202004e-14
 x9[0.213348; 3.0]  f=10.800890897400675
step10  d=[-0.133484; -0.0]  α=0.0625
 x10[0.205006; 3.0]  f=10.80012528244697
step11  d=[-0.0500565; -0.0]  α=1.1368683772161603e-13
 x11[0.205006; 3.0]  f=10.800125282446968
step12  d=[-0.0500565; -0.0]  α=0.0

([0.200099; 3.0], 10.800000048993713)

In [40]:
#####  ####  ###### test   ######  ######  ######

In [144]:
gradient_descent(
(x1,x2)->5*x1^2+x2^2+4*x1*x2-14*x1-6*x2+20,
(x1,x2)->[10*x1+4*x2-14 2*x2-6]',#vector value
    [2 3]'
    )
#step1 是从x1开始算起的
#d f(x)梯度
#\arerfa d 都是上一步的值   

step0  d=[-18; 0]
  x[2; 3]  f=27
step1  d=[-6.75; -0.0]  α=3.469446951953614e-18
 x1[0.875; 3.0]  f=13.078125
step2  d=[-6.75; -0.0]  α=0.0625
 x2[0.453125; 3.0]  f=11.120361328125
step3  d=[-2.53125; -0.0]  α=1.1102230246251565e-16
 x3[0.453125; 3.0]  f=11.120361328124998
step4  d=[-2.53125; -0.0]  α=0.0625
 x4[0.294922; 3.0]  f=10.845050811767578
step5  d=[-0.949219; -0.0]  α=2.220446049250313e-16
 x5[0.294922; 3.0]  f=10.845050811767578
step6  d=[-0.949219; -0.0]  α=0.0625
 x6[0.235596; 3.0]  f=10.806335270404816
step7  d=[-0.355957; -0.0]  α=1.7763568394002505e-15
 x7[0.235596; 3.0]  f=10.806335270404816
step8  d=[-0.355957; -0.0]  α=0.0625
 x8[0.213348; 3.0]  f=10.800890897400677
step9  d=[-0.133484; -0.0]  α=1.4210854715202004e-14
 x9[0.213348; 3.0]  f=10.800890897400675
step10  d=[-0.133484; -0.0]  α=0.0625
 x10[0.205006; 3.0]  f=10.80012528244697
step11  d=[-0.0500565; -0.0]  α=1.1368683772161603e-13
 x11[0.205006; 3.0]  f=10.800125282446968
step12  d=[-0.0500565; -0.0]  α=0.0

([0.200099; 3.0], 10.800000048993713)

In [None]:
#####test 

# niu dun fa 

In [5]:
function niudunfa(f,g,h,x0;ϵ=0.01,maxstep=128) #原，一阶 hession 
f(x0...)
    i=1
    while abs(norm(g(x0...))) > ϵ && i < maxstep
    if det(h(x0...))==0                    ##判断黑塞矩阵是否可逆(行列式为0不可逆)；
                println("ERROR : H Matrix irreversible!","\r","Can't use Newton! ")
    else
    x1=x0-inv(h(x0...))*g(x0...)'
    x0=x1
             println("step = ",i,"   x = ",x0,"     g = ",g(x0...))
            i+=1
   

    end
    end
    return x0,f(x0...)

end


niudunfa (generic function with 1 method)

In [6]:
niudunfa(
    (x,y)->(1-x)^2+100*(y-x^2)^2,
    (x,y)->[-2*(1-x)-400x*(y-x^2) 200*(y-x^2)],
    (x,y)->[2+800*x^2-400*(y-x^2) -400*x
        -400*x 200],
    [-2 2]'
    )

step = 1   x = [-1.99252; 3.97007]     g = [-6.02965 -0.011194]
step = 2   x = [0.966873; -7.82315]     g = [3387.08 -1751.6]
step = 3   x = [0.966892; 0.934879]     g = [-0.0662167 -7.14555e-8]
step = 4   x = [1.0; 0.998904]     g = [0.438467 -0.219233]
step = 5   x = [1.0; 1.0]     g = [-8.50793e-10 0.0]


([1.0; 1.0], 1.809621786446634e-19)

In [7]:
niudunfa((x1,x2)->5*x1+x2^2+4*x1*x2-14*x1-6*x2+20,
    (x1,x2)->[5+4*x2-14 2*x2+4*x1-6],
    (x1,x2)->[0 4
              4 2],
    [1 2]')

step = 1   x = [0.375; 2.25]     g = [0.0 0.0]


([0.375; 2.25], 11.5625)

In [8]:
niudunfa((x1,x2,x3,x4)->(x1+10*x2)^2+5*(x3-x4)^2+(x2-2*x3)^4+10*(x1-x4)^4,
    (x1,x2,x3,x4)->[2*(x1+10*x2)+40*(x1-x4)^3 20*(x1+10*x2)+4*(x2-2*x3)^3  10*(x3-x4)-8*(x2-2*x3)^3 -10*(x3-x4)-40*(x1-x4)^3],
    (x1,x2,x3,x4)->[2+120*(x1-x4)^2 20 0 -120*(x1-x4)^2
                    20 200+12*(x2-2*x3)^2 -24*(x2-2*x3)^2 0
                    0 -24*(x2-2*x3)^2 10+48*(x2-2*x3)^2 -10
                    -120*(x1-x4)^2 0 -10 10+120*(x1-x4)^2],
                    [3 -1 0 1]')


step = 1   x = [1.5873; -0.15873; 0.253968; 0.253968]     g = [94.8148 -1.18519 2.37037 -94.8148]
step = 2   x = [1.0582; -0.10582; 0.169312; 0.169312]     g = [28.0933 -0.351166 0.702332 -28.0933]
step = 3   x = [0.705467; -0.0705467; 0.112875; 0.112875]     g = [8.32393 -0.104049 0.208098 -8.32393]
step = 4   x = [0.470312; -0.0470312; 0.0752499; 0.0752499]     g = [2.46635 -0.0308294 0.0616588 -2.46635]
step = 5   x = [0.313541; -0.0313541; 0.0501666; 0.0501666]     g = [0.730771 -0.00913463 0.0182693 -0.730771]
step = 6   x = [0.209027; -0.0209027; 0.0334444; 0.0334444]     g = [0.216525 -0.00270656 0.00541312 -0.216525]
step = 7   x = [0.139352; -0.0139352; 0.0222963; 0.0222963]     g = [0.0641554 -0.000801943 0.00160389 -0.0641554]
step = 8   x = [0.0929011; -0.00929011; 0.0148642; 0.0148642]     g = [0.019009 -0.000237613 0.000475226 -0.019009]
step = 9   x = [0.061934; -0.0061934; 0.00990945; 0.00990945]     g = [0.0056323 -7.04038e-5 0.000140808 -0.0056323]


([0.061934; -0.0061934; 0.00990945; 0.00990945], 7.371239973103289e-5)

# 步长固定梯度法

In [38]:
function Step_length_fixed_gradient(f,x0,Q,c;
    ϵ=0.01,ϵf=0.001,maxIteractions=128)
    gk=Q*x0-c
    f1=f(x0...)
    α=(gk'*gk)/(gk'*Q*gk)
    x1=x0'-α.*gk'
    f2=f(x1...)
    i=1
    println("g0",gk,"α",α)
    while abs(norm(f2)-norm(f1))>ϵf && i < maxIteractions
    gk=Q*x1'-c
    f1=f(x1...)
        println("step = ",i,  "  g",gk,   "α=",α,"   x1 = ",x1)
    α=(gk'*gk)/(gk'*Q*gk)
    x1=x1-α.*gk'
    f2=f(x1...)
        i+=1
       println("f = ",f2)
    end
    return x1,f2
end

Step_length_fixed_gradient (generic function with 1 method)

In [45]:
Step_length_fixed_gradient((x1,x2)->1/2*[x1 x2]*[20 5;5 2]*[x1 x2]'- [14 6]*[x1 x2]'+10,
    [2 2]',
    [20 5;5 2],
    [14 6]'
)

g0[36; 8]α[0.0470133]
step = 1  g[0.269912; -1.2146]α=[0.0470133]   x1 = [0.307522 1.62389]
f = [0.969769]
step = 2  g[1.19501; 0.265558]α=[1.37097]   x1 = [-0.0625178 3.28907]
f = [0.934543]
step = 3  g[0.00895965; -0.0403184]α=[0.0470133]   x1 = [-0.118699 3.27659]
f = [0.933373]
step = 4  g[0.0396681; 0.00881514]α=[1.37097]   x1 = [-0.130983 3.33186]
f = [0.933335]


([-0.132848 3.33145], [0.933335])

# Levenberg-Marquardt

In [73]:
function LM(f,g,h,x0;
        ϵ=eps(),maxstep=128)     
    i = 0
    if abs(norm(g(x0...))) <= ϵ
        return x0,f(x0...)
    else
        while abs(norm(g(x0...))) > ϵ && i < maxstep
            i += 1
            α=0.1
            μ=0.0001
            println("step = ",i,"\t","x = ",x0,"\t","derivattive = ",g(x0...))
            x0 = x0 -α*(inv(h(x0...)+μ*eye(size(h(x0...))[1])))*g(x0...)'
        end
        return x0,f(x0...),g(x0...)
    end
end

LM (generic function with 1 method)

In [74]:
LM((x,y)->(1-x)^2+100*(y-x^2)^2,
    (x,y)->[-2*(1-x)-400*x*(y-x^2) 200*(y-x^2)],
    (x,y)->[2+800*x^2-400*(y-x^2) -400*x;-400*x 200],
    [2 -2]'
)

step = 1	x = [2; -2]	derivattive = [4802 -1200]
step = 2	x = [1.99992; -1.40033]	derivattive = [4321.82 -1080.0]
step = 3	x = [1.99982; -0.860704]	derivattive = [3889.66 -972.0]
step = 4	x = [1.99972; -0.375116]	derivattive = [3500.71 -874.8]
step = 5	x = [1.99961; 0.061827]	derivattive = [3150.66 -787.32]
step = 6	x = [1.99948; 0.454979]	derivattive = [2835.61 -708.588]
step = 7	x = [1.99934; 0.80871]	derivattive = [2552.07 -637.729]
step = 8	x = [1.99918; 1.12695]	derivattive = [2296.89 -573.957]
step = 9	x = [1.99901; 1.41323]	derivattive = [2067.22 -516.561]
step = 10	x = [1.99882; 1.67074]	derivattive = [1860.52 -464.905]
step = 11	x = [1.9986; 1.90233]	derivattive = [1674.48 -418.414]
step = 12	x = [1.99836; 2.11059]	derivattive = [1507.06 -376.573]
step = 13	x = [1.9981; 2.29782]	derivattive = [1356.37 -338.916]
step = 14	x = [1.9978; 2.4661]	derivattive = [1220.75 -305.024]
step = 15	x = [1.99748; 2.61731]	derivattive = [1098.7 -274.522]
step = 16	x = [1.99712; 2.75313]	derivat

([1.00625; 1.01241], 4.046511870143458e-5, [0.0606363 -0.0239209])