In [1]:
# optimizer_gdx_rosenbrock.py
# Gradient descent w/momentum & adaptive lr optimization
"""
Dr. Juan R. Castro
FCQI, UABC Campus Tijuana
"""
# Importing Libraries
import numpy as np

In [2]:
# objective Extended Rosenbrock function
def objfcn(x):
    # Minima -> f=0 at (1,.....,1)
    n = len(x) # n par
    z = np.zeros((n,1))
    l2 = np.array(range(0,n,2)) # indice par
    l1 = np.array(range(1,n,2)) # indice impar
    z[l2]=10.0*(x[l1]-(x[l2])**2.0)
    z[l1]=1.0-x[l2]
    f = z.T @ z
    return f[0,0]

In [3]:
# Extended Rosenbrock gradient function
def objfcngrad(x):
    n = len(x) # n even
    Jz = np.zeros((n,n))
    z = np.zeros((n,1))
    l2 = np.array(range(0,n,2)) # indice par
    l1 = np.array(range(1,n,2)) # indice impar
    z[l2]=10.0*(x[l1]-(x[l2])**2.0)
    z[l1]=1.0-x[l2]

    for i in range(n//2):
        Jz[2*i,2*i]     = -20.0*x[2*i]
        Jz[2*i,2*i+1]   = 10.0
        Jz[2*i+1,2*i]   = -1.0

    gX = 2.0*Jz.T @ z
    return gX

In [4]:
def objfcnjac(x):
    # Extended Rosenbrock Jacobian Function
    n = len(x) # n even
    Jz = np.zeros((n,n))
    z = np.zeros((n,1))
    l2 = np.array(range(0,n,2)) # indice par
    l1 = np.array(range(1,n,2)) # indice impar
    z[l2]=10.0*(x[l1]-(x[l2])**2.0)
    z[l1]=1.0-x[l2]

    for i in range(n//2):
        Jz[2*i,2*i]     = -20.0*x[2*i]
        Jz[2*i,2*i+1]   = 10.0
        Jz[2*i+1,2*i]   = -1.0

    gX = 2.0*Jz.T @ z
    normgX = np.linalg.norm(gX)
    return z, Jz, normgX

In [5]:
# Gradient descent w/momentum & adaptive lr optimization
def traingdx(X,maxEpochs,goal,lr,mc,lr_inc,lr_dec,max_perf_inc,mingrad,show):
    # traingdx is a optimization function that updates variables
    # values according to gradient descent w/momentum & adaptive lr optimization
    this = "traingdx"
    stop = ""
    epochs = []
    perfs  = []
    # Performance and Gradient
    perf = objfcn(X)
    gX = objfcngrad(X)
    normgX = np.linalg.norm(gX)
    dX = lr*gX
    print("\n")
    # Train
    for epoch in range(maxEpochs+1):
        # Stopping criteria
        if perf <= goal:
            stop = "Performance goal met"
        elif epoch == maxEpochs:
            stop = "Maximum epoch reached, performance goal was not met"
        elif normgX < mingrad:
            stop = "Minimum gradient reached, performance goal was not met"
        # Progress
        if (np.fmod(epoch,show) == 0 or len(stop) != 0):
            print(this,end = ": ")
            if np.isfinite(maxEpochs):
                print("Epoch ",epoch, "/", maxEpochs,end = " ")
            if np.isfinite(goal):
                print(", Performance %8.3e" % perf, "/", goal, end = " ")
            if np.isfinite(mingrad):
                print(", Gradient %8.3e" % normgX, "/", mingrad)

            epochs = np.append(epochs,epoch)
            perfs = np.append(perfs,perf)
            if len(stop) != 0:
                print("\n",this,":",stop,"\n")
                break
        # Gradient Descent with Momentum and Adaptive Learning Rate
        dX = mc*dX - (1-mc)*lr*gX
        X2 = X + dX
        perf2 = objfcn(X2)

        if (perf2/perf) > max_perf_inc :
            lr = lr*lr_dec
            dX = lr*gX
        else:
            if (perf2 < perf):
                lr = lr*lr_inc
            X = X2
            perf = perf2
            gX   = objfcngrad(X)
            normgX = np.linalg.norm(gX)

    return X, perfs, epochs

In [6]:
# seed the pseudo random number generator
np.random.seed(1)
n = 10
x = 10*(np.random.randn(n,1))
print("\n",x,"\n")
# Performance goal met
goal = 1e-8
# define the total iterations
max_epochs = 5000
# rate learning
lr = 1e-3
# momentum
mc = 0.9
# rate learning increment
lr_inc = 1.05
# rate learning decrement
lr_dec = 0.70
# maximum performance increment
max_perf_inc = 1.04
# minimum gradient
min_grad = 1e-11
# show
show = 100
# perform the gradient descent
x, perfs, epochs = traingdx(x,max_epochs,goal,lr,mc,lr_inc,lr_dec,max_perf_inc,min_grad,show)
print(x)


 [[ 16.24345364]
 [ -6.11756414]
 [ -5.28171752]
 [-10.72968622]
 [  8.65407629]
 [-23.01538697]
 [ 17.44811764]
 [ -7.61206901]
 [  3.19039096]
 [ -2.49370375]] 



traingdx: Epoch  0 / 5000 , Performance 1.815e+07 / 1e-08 , Gradient 2.819e+06 / 1e-11
traingdx: Epoch  100 / 5000 , Performance 2.670e+05 / 1e-08 , Gradient 7.422e+04 / 1e-11
traingdx: Epoch  200 / 5000 , Performance 7.651e+03 / 1e-08 , Gradient 1.749e+03 / 1e-11
traingdx: Epoch  300 / 5000 , Performance 2.377e+00 / 1e-08 , Gradient 2.625e+00 / 1e-11
traingdx: Epoch  400 / 5000 , Performance 2.760e-01 / 1e-08 , Gradient 5.742e-01 / 1e-11
traingdx: Epoch  500 / 5000 , Performance 9.311e-02 / 1e-08 , Gradient 1.527e+00 / 1e-11
traingdx: Epoch  600 / 5000 , Performance 3.825e-02 / 1e-08 , Gradient 1.058e+00 / 1e-11
traingdx: Epoch  700 / 5000 , Performance 1.931e-02 / 1e-08 , Gradient 1.857e+00 / 1e-11
traingdx: Epoch  800 / 5000 , Performance 1.400e-02 / 1e-08 , Gradient 1.105e-01 / 1e-11
traingdx: Epoch  900 / 5000 , Perf

  Jz[2*i,2*i]     = -20.0*x[2*i]


traingdx: Epoch  2600 / 5000 , Performance 5.892e-07 / 1e-08 , Gradient 7.123e-03 / 1e-11
traingdx: Epoch  2700 / 5000 , Performance 4.818e-07 / 1e-08 , Gradient 6.209e-04 / 1e-11
traingdx: Epoch  2800 / 5000 , Performance 2.913e-07 / 1e-08 , Gradient 5.433e-04 / 1e-11
traingdx: Epoch  2900 / 5000 , Performance 1.463e-07 / 1e-08 , Gradient 1.969e-03 / 1e-11
traingdx: Epoch  3000 / 5000 , Performance 7.953e-08 / 1e-08 , Gradient 1.342e-03 / 1e-11
traingdx: Epoch  3100 / 5000 , Performance 4.760e-08 / 1e-08 , Gradient 2.392e-04 / 1e-11
traingdx: Epoch  3200 / 5000 , Performance 3.724e-08 / 1e-08 , Gradient 1.727e-04 / 1e-11
traingdx: Epoch  3300 / 5000 , Performance 2.174e-08 / 1e-08 , Gradient 1.482e-04 / 1e-11
traingdx: Epoch  3400 / 5000 , Performance 1.254e-08 / 1e-08 , Gradient 1.012e-04 / 1e-11
traingdx: Epoch  3444 / 5000 , Performance 9.983e-09 / 1e-08 , Gradient 8.933e-05 / 1e-11

 traingdx : Performance goal met 

[[0.99995538]
 [0.99991058]
 [0.99995535]
 [0.99991053]
 [0.9999