In [None]:
import numpy as np

# Algorithm implementation

In [None]:
def backtracking(function,gradient,initial_s,gamma,beta,initial_x):
    """
    Description: 
    Gradient-based method with backtracking to
    find the minimum of a multivariant function
    Parameters:
    function: objective function to be minimized
    gradient: gradient of objective function
    initial_s: initial guess of step length
    gamma: real number between 0 and 1 to 
    test if the function is decreased sufficiently
    beta: real number between 0 and 1 to decrease the 
    step length if the original one does not make
    sufficient decrease
    initial_x: initial point
    """
    # import library
    import numpy as np
    # set maximum number of iteration
    max_iter=1000
    # set stop criteria
    stop_criteria=1E-5
    # print initial point
    print('Initial point: {}.T'.format(initial_x.reshape(1,-1)))
    # set x to initial point
    x_k=initial_x
    # set s to initial s
    s=initial_s
    # create lists to store result
    d_k_list=[]
    s_list=[]
    x_k_list=[]
    # iterate until maximum iteration is reached 
    for iter in range(max_iter):
        # calcualte gradient value
        gradient_k=gradient(x_k)
        # calcualte function value
        function_k=function(x_k)
        # set s to initial s
        s=initial_s
        # check stop criteria
        if ((np.linalg.norm(gradient_k)/(1+np.abs(function_k))) > stop_criteria):
            # descent direction
            d_k=-gradient_k
            # next point
            x_k_plus_one=x_k+s*d_k
            # check if the sufficient decrease condition is met
            while (function(x_k) - function(x_k-s*gradient(x_k))) < (-gamma*s*np.dot(gradient_k.T,d_k)):
                # update step length
                s=beta*s
                # re-calculate next point
                x_k_plus_one=x_k+s*d_k
            # update x
            x_k=x_k_plus_one
            # put result in lists
            d_k_list.append(d_k)
            s_list.append(s)
            x_k_list.append(x_k)
        # if the stop criteria is satisfied
        else:
            # print out the solution
            print('Solution found: {}.T'.format(x_k.reshape(1,-1)))
            # break the iteration
            break
    # print result
    # if iteration less than 15 print each iteration
    if iter < 15:
        for i in range(iter):
            print('=====Iteration{}====='.format(i+1))
            # print search direction
            print('Search direction: {}.T'.format(d_k_list[i].reshape(1,-1)))
            # print step length
            print('Step length: {}'.format(s_list[i]))
            # print new iterate
            print('New iterate: {}.T'.format(x_k_list[i].reshape(1,-1)))
    # if iteration is greater than 15
    else:
        # print first 10 iterations
        for i in range(10):
            print('=====Iteration{}====='.format(i+1))
            # print search direction
            print('Search direction: {}.T'.format(d_k_list[i].reshape(1,-1)))
            # print step length
            print('Step length: {}'.format(s_list[i]))
            # print new iterate
            print('New iterate: {}.T'.format(x_k_list[i].reshape(1,-1)))
        print('''
        ..........
        ..........''')
        # print last 5 iterations
        for i in range(5):
            print('=====Iteration{}====='.format(iter-3+i))
            # print search direction
            print('Search direction: {}.T'.format(d_k_list[iter-5+i].reshape(1,-1)))
            # print step length
            print('Step length: {}'.format(s_list[iter-5+i]))
            # print new iterate
            print('New iterate: {}.T'.format(x_k_list[iter-5+i].reshape(1,-1)))
        # print warning if maximum iteration is reached
        if iter == 999:
            print('====================')
            print('Maximum iteration is reached !!!')


# Test function 1: 
\begin{equation}
f_{1}(x)=x_{1}^{2}+x_{2}^{2}+x_{3}^{2}
\end{equation}

In [None]:
# define function
def function_1(x):
    x1=x[0]
    x2=x[1]
    x3=x[2]
    return x1**2+x2**2+x3**2
# define gradient
def gradient_1(x):
    x1=x[0]
    x2=x[1]
    x3=x[2]
    gradient=np.array([2*x1,2*x2,2*x3]).reshape(-1,1)
    return gradient

In [None]:
# initial point (1,1,1).T
initial=np.array([1,1,1]).reshape(-1,1)
backtracking(function_1,gradient_1,1,0.3,0.5,initial)

Initial point: [[1 1 1]].T
Solution found: [[0. 0. 0.]].T
=====Iteration1=====
Search direction: [[-2 -2 -2]].T
Step length: 0.5
New iterate: [[0. 0. 0.]].T


# Test function 2: 
\begin{equation}
f_{2}(x)=x_{1}^{2}+2 x_{2}^{2}-2 x_{1} x_{2}-2 x_{2}
\end{equation}

In [None]:
# define function
def function_2(x):
    x1=x[0]
    x2=x[1]
    return x1**2+2*x2**2-2*x1*x2-2*x2
# define gradient
def gradient_2(x):
    x1=x[0]
    x2=x[1]
    gradient=np.array([2*x1-2*x2,4*x2-2*x1-2]).reshape(-1,1)
    return gradient

In [None]:
# initial point (0,0).T
initial=np.array([0,0]).reshape(-1,1)
backtracking(function_2,gradient_2,1,0.3,0.5,initial)

Initial point: [[0 0]].T
Solution found: [[0.99998474 0.99999237]].T
=====Iteration1=====
Search direction: [[0 2]].T
Step length: 0.25
New iterate: [[0.  0.5]].T
=====Iteration2=====
Search direction: [[ 1. -0.]].T
Step length: 0.5
New iterate: [[0.5 0.5]].T
=====Iteration3=====
Search direction: [[-0.  1.]].T
Step length: 0.25
New iterate: [[0.5  0.75]].T
=====Iteration4=====
Search direction: [[ 0.5 -0. ]].T
Step length: 0.5
New iterate: [[0.75 0.75]].T
=====Iteration5=====
Search direction: [[-0.   0.5]].T
Step length: 0.25
New iterate: [[0.75  0.875]].T
=====Iteration6=====
Search direction: [[ 0.25 -0.  ]].T
Step length: 0.5
New iterate: [[0.875 0.875]].T
=====Iteration7=====
Search direction: [[-0.    0.25]].T
Step length: 0.25
New iterate: [[0.875  0.9375]].T
=====Iteration8=====
Search direction: [[ 0.125 -0.   ]].T
Step length: 0.5
New iterate: [[0.9375 0.9375]].T
=====Iteration9=====
Search direction: [[-0.     0.125]].T
Step length: 0.25
New iterate: [[0.9375  0.96875]].T
=

# Test function 3:
\begin{equation}
f_{3}(x)=100\left(x_{2}-x_{1}^{2}\right)^{2}+\left(1-x_{1}\right)^{2}
\end{equation}

In [None]:
# define function
def function_3(x):
    x1=x[0]
    x2=x[1]
    return 100*(x2-x1**2)**2+(1-x1)**2
# define gradient
def gradient_3(x):
    x1=x[0]
    x2=x[1]
    gradient=np.array([-400*x1*(x2-x1**2)-2*(1-x1),200*(x2-x1**2)]).reshape(-1,1)
    return gradient

In [None]:
# initial point (-1.2,1).T
initial=np.array([-1.2,1]).reshape(-1,1)
backtracking(function_3,gradient_3,1,0.3,0.5,initial)

Initial point: [[-1.2  1. ]].T
Solution found: [[1.0000106  1.00002124]].T
=====Iteration1=====
Search direction: [[215.6  88. ]].T
Step length: 0.0009765625
New iterate: [[-0.98945312  1.0859375 ]].T
=====Iteration2=====
Search direction: [[-38.33803031 -21.38400269]].T
Step length: 0.0009765625
New iterate: [[-1.02689261  1.06505468]].T
=====Iteration3=====
Search direction: [[-0.27816415 -2.10925141]].T
Step length: 0.00390625
New iterate: [[-1.02797919  1.05681542]].T
=====Iteration4=====
Search direction: [[ 4.02544229 -0.01484275]].T
Step length: 0.5
New iterate: [[0.98474196 1.04939405]].T
=====Iteration5=====
Search direction: [[ 31.41515509 -15.93546343]].T
Step length: 0.0009765625
New iterate: [[1.01542082 1.03383207]].T
=====Iteration6=====
Search direction: [[ 1.08718639 -0.55052448]].T
Step length: 0.0009765625
New iterate: [[1.01648253 1.03329445]].T
=====Iteration7=====
Search direction: [[-0.00949734 -0.01154359]].T
Step length: 0.015625
New iterate: [[1.01633413 1.033

# Test function 4:
\begin{equation}
f_{4}(x)=\left(x_{1}+x_{2}\right)^{4}+x_{2}^{2}
\end{equation}

In [None]:
# define function
def function_4(x):
    x1=x[0]
    x2=x[1]
    return (x1+x2)**4+x2**2
# define gradient
def gradient_4(x):
    x1=x[0]
    x2=x[1]
    gradient=np.array([4*(x1+x2)**3,4*(x1+x2)**3+2*x2]).reshape(-1,1)
    return gradient

In [None]:
# initial point (2,-2).T
initial=np.array([2,-2]).reshape(-1,1)
backtracking(function_4,gradient_4,1,0.3,0.5,initial)

Initial point: [[ 2 -2]].T
Solution found: [[ 1.35714901e-02 -4.90320909e-06]].T
=====Iteration1=====
Search direction: [[0 4]].T
Step length: 0.25
New iterate: [[ 2. -1.]].T
=====Iteration2=====
Search direction: [[-4. -2.]].T
Step length: 0.0625
New iterate: [[ 1.75  -1.125]].T
=====Iteration3=====
Search direction: [[-0.9765625  1.2734375]].T
Step length: 0.5
New iterate: [[ 1.26171875 -0.48828125]].T
=====Iteration4=====
Search direction: [[-1.85069847 -0.87413597]].T
Step length: 0.125
New iterate: [[ 1.03038144 -0.59754825]].T
=====Iteration5=====
Search direction: [[-0.3243558   0.87074069]].T
Step length: 0.25
New iterate: [[ 0.94929249 -0.37986307]].T
=====Iteration6=====
Search direction: [[-0.73854964  0.02117651]].T
Step length: 0.5
New iterate: [[ 0.58001767 -0.36927482]].T
=====Iteration7=====
Search direction: [[-0.03743851  0.70111112]].T
Step length: 0.25
New iterate: [[ 0.57065804 -0.19399704]].T
=====Iteration8=====
Search direction: [[-0.21375289  0.17424119]].T
Ste

# Test function 5:
\begin{equation}
f_{5}(x)=\left(x_{1}-1\right)^{2}+\left(x_{2}-1\right)^{2}+c\left(x_{1}^{2}+x_{2}^{2}-0.25\right)^{2}
\end{equation}

**c=1**

In [None]:
# define function
def function_5_1(x):
    x1=x[0]
    x2=x[1]
    return (x1-1)**2+(x2-1)**2+(x1**2+x2**2-0.25)**2
# define gradient
def gradient_5_1(x):
    x1=x[0]
    x2=x[1]
    gradient=np.array([2*(x1-1)+4*x1*(x1**2+x2**2-0.25),
                                    2*(x2-1)+4*x2*(x1**2+x2**2-0.25)]).reshape(-1,1)
    return gradient

In [None]:
# initial point (1,-1).T
initial=np.array([1,-1]).reshape(-1,1)
backtracking(function_5_1,gradient_5_1,1,0.3,0.5,initial)

Initial point: [[ 1 -1]].T
Solution found: [[0.56408669 0.56408569]].T
=====Iteration1=====
Search direction: [[-7. 11.]].T
Step length: 0.0625
New iterate: [[ 0.5625 -0.3125]].T
=====Iteration2=====
Search direction: [[0.50585938 2.83007812]].T
Step length: 0.25
New iterate: [[0.68896484 0.39501953]].T
=====Iteration3=====
Search direction: [[-0.42712114  0.60840468]].T
Step length: 0.25
New iterate: [[0.58218456 0.5471207 ]].T
=====Iteration4=====
Search direction: [[-0.06857142  0.05601467]].T
Step length: 0.25
New iterate: [[0.5650417  0.56112437]].T
=====Iteration5=====
Search direction: [[0.00171264 0.01556643]].T
Step length: 0.125
New iterate: [[0.56525579 0.56307017]].T
=====Iteration6=====
Search direction: [[-0.00453747  0.00321326]].T
Step length: 0.25
New iterate: [[0.56412142 0.56387349]].T
=====Iteration7=====
Search direction: [[0.00033335 0.0012122 ]].T
Step length: 0.125
New iterate: [[0.56416309 0.56402501]].T
=====Iteration8=====
Search direction: [[-0.00030612  0.0

**c=10**


In [None]:
# define function
def function_5_2(x):
    x1=x[0]
    x2=x[1]
    return (x1-1)**2+(x2-1)**2+10*(x1**2+x2**2-0.25)**2
# define gradient
def gradient_5_2(x):
    x1=x[0]
    x2=x[1]
    gradient=np.array([2*(x1-1)+40*x1*(x1**2+x2**2-0.25),
                                    2*(x2-1)+40*x2*(x1**2+x2**2-0.25)]).reshape(-1,1)
    return gradient

In [None]:
# initial point (1,-1).T
initial=np.array([1,-1]).reshape(-1,1)
backtracking(function_5_2,gradient_5_2,1,0.3,0.5,initial)

Initial point: [[ 1 -1]].T
Solution found: [[0.40260809 0.40261189]].T
=====Iteration1=====
Search direction: [[-70.  74.]].T
Step length: 0.0078125
New iterate: [[ 0.453125 -0.421875]].T
=====Iteration2=====
Search direction: [[-1.32232666  5.09320068]].T
Step length: 0.125
New iterate: [[0.28783417 0.21477509]].T
=====Iteration3=====
Search direction: [[2.81771562 2.61016016]].T
Step length: 0.0625
New iterate: [[0.46394139 0.3779101 ]].T
=====Iteration4=====
Search direction: [[-0.9331796  -0.38926338]].T
Step length: 0.03125
New iterate: [[0.43477953 0.36574562]].T
=====Iteration5=====
Search direction: [[-0.1356909   0.20341224]].T
Step length: 0.125
New iterate: [[0.41781817 0.39117215]].T
=====Iteration6=====
Search direction: [[-0.13233786  0.0036503 ]].T
Step length: 0.0625
New iterate: [[0.40954705 0.39140029]].T
=====Iteration7=====
Search direction: [[0.01905408 0.10682851]].T
Step length: 0.0625
New iterate: [[0.41073793 0.39807707]].T
=====Iteration8=====
Search direction

**c=100**

In [None]:
# define function
def function_5_3(x):
    x1=x[0]
    x2=x[1]
    return (x1-1)**2+(x2-1)**2+100*(x1**2+x2**2-0.25)**2
# define gradient
def gradient_5_3(x):
    x1=x[0]
    x2=x[1]
    gradient=np.array([2*(x1-1)+400*x1*(x1**2+x2**2-0.25),
                                    2*(x2-1)+400*x2*(x1**2+x2**2-0.25)]).reshape(-1,1)
    return gradient

In [None]:
# initial point (1,-1).T
initial=np.array([1,-1]).reshape(-1,1)
backtracking(function_5_3,gradient_5_3,1,0.3,0.5,initial)

Initial point: [[ 1 -1]].T
Solution found: [[0.35979134 0.35978779]].T
=====Iteration1=====
Search direction: [[-700.  704.]].T
Step length: 0.0009765625
New iterate: [[ 0.31640625 -0.3125    ]].T
=====Iteration2=====
Search direction: [[ 7.97765255 -3.90385437]].T
Step length: 0.0078125
New iterate: [[ 0.37873166 -0.34299886]].T
=====Iteration3=====
Search direction: [[-0.43689437  4.20697682]].T
Step length: 0.0078125
New iterate: [[ 0.37531842 -0.31013186]].T
=====Iteration4=====
Search direction: [[3.1941601  1.01324565]].T
Step length: 0.015625
New iterate: [[ 0.42522717 -0.29429989]].T
=====Iteration5=====
Search direction: [[-1.81523636  4.64052656]].T
Step length: 0.0078125
New iterate: [[ 0.41104564 -0.25804578]].T
=====Iteration6=====
Search direction: [[3.55438672 1.02418882]].T
Step length: 0.0078125
New iterate: [[ 0.43881429 -0.2500443 ]].T
=====Iteration7=====
Search direction: [[0.23067757 3.00819186]].T
Step length: 0.125
New iterate: [[0.46764898 0.12597968]].T
=====I