$\large\textbf{Consider the function :}$ \\
\begin{align}
\mathbf{f(x)} = 400x_1^2 +0.004x_2^4 
  \\
\end{align}

In [1]:
import numpy as np
import matplotlib.pyplot as plt

In [2]:
#Now we will define a Python function which will compute and return the objective function value 
def evalf(x):
  #Input: x is a numpy array of size 2
  assert type(x) is np.ndarray and len(x) == 2 #do not allow arbitrary arguments 
  #after checking if the argument is valid, we can compute the objective function value
  #compute the function value and return it 
  return np.float128(400*x[0]**2 + 0.004*x[1]**4)

In [3]:
#Now we will define a Python function which will compute and return the gradient value as a numpy array 
def evalg(x):  
  #Input: x is a numpy array of size 2 
  assert type(x) is np.ndarray and len(x) == 2 #do not allow arbitrary arguments 
  #after checking if the argument is valid, we can compute the gradient value
  #compute the gradient value and return it 
  return np.array([800*x[0], 0.016*x[1]**3])

In [4]:
#method to find Hessian matrix:
def evalh(x): 
  assert type(x) is np.ndarray 
  assert len(x) == 2

  return np.array([[800, 0],[0, 0.048*x[1]**2]])

In [5]:
#method to find inverse of Hessian matrix:
def evalh_inv(x):
  assert type(x) is np.ndarray 
  assert len(x) == 2

  return np.linalg.inv(evalh(x))

In [6]:
#Complete the module to compute the steplength by backtracking without scaling
def compute_steplength_backtracking(x, gradf, alpha_start, rho, gamma):
  assert type(x) is np.ndarray and len(gradf) == 2 
  assert type(gradf) is np.ndarray and len(gradf) == 2 
  assert type(alpha_start) is float and alpha_start>=0. 
  assert type(rho) is float and rho>=0.
  assert type(gamma) is float and gamma>=0. 
  
  alpha = alpha_start
  pk=-gradf
  while evalf(x+alpha*pk)>evalf(x)+gamma*alpha*np.linalg.multi_dot([gradf,pk]):
    alpha=rho*alpha
  
  return alpha

In [7]:
def compute_steplength_backtracking_scaled_direction(x, gradf, direction, alpha_start, rho, gamma):
  assert type(x) is np.ndarray and len(gradf) == 2 
  assert type(gradf) is np.ndarray and len(gradf) == 2 
  assert type(direction) is np.ndarray and len(direction) == 2 
  assert type(alpha_start) is float and alpha_start>=0. 
  assert type(rho) is float and rho>=0.
  assert type(gamma) is float and gamma>=0. 
  
  alpha = alpha_start
  while evalf(x+alpha*direction)>evalf(x)+gamma*alpha*np.linalg.multi_dot([gradf,direction]):
    alpha=rho*alpha

  return alpha

In [8]:
#line search type
NEWTONS_CONSTANT_STEP_LENGTH = 1
NEWTONS_BACKTRACKING_LINE_SEARCH = 2
WITHOUT_SCALING_BACKTRACKING_LINE_SEARCH = 3
DIAG_MATRIX_SCALING_BACKTRACKING_LINE_SEARCH = 4

In [9]:
#code for Newtons method with scaling to find the minimizer

def find_minimizer_newtons(x, tol, line_search_type, *args):
  #Input: start_x is a numpy array of size 2, tol denotes the tolerance and is a positive float value
  assert type(x) is np.ndarray and len(x) == 2 #do not allow arbitrary arguments 
  assert type(tol) is float and tol>=0 

  g_x = evalg(x)
  
  #initialization for backtracking line search
  if(line_search_type == NEWTONS_BACKTRACKING_LINE_SEARCH):
    alpha_start = args[0]
    rho = args[1]
    gamma = args[2]


  if line_search_type == NEWTONS_CONSTANT_STEP_LENGTH:
    k=0
    while (np.linalg.norm(g_x) > tol):
      D_k = evalh_inv(x)
      direction = -np.matmul(D_k, g_x)
      step_length = 1
      #implement the gradient descent steps here   
      x = np.add(x, np.multiply(step_length,direction)) #update x = x + step_length*direction
      k += 1 #increment iteration
      g_x = evalg(x) #compute gradient at new point

  elif line_search_type == NEWTONS_BACKTRACKING_LINE_SEARCH:
    k=0
    while (np.linalg.norm(g_x) > tol):
      D_k = evalh_inv(x)
      direction = -np.matmul(D_k, g_x)
      step_length = compute_steplength_backtracking_scaled_direction(x, g_x, direction, alpha_start, rho, gamma)
      #implement the gradient descent steps here   
      x = np.add(x, np.multiply(step_length,direction)) #update x = x + step_length*direction
      k += 1 #increment iteration
      g_x = evalg(x) #compute gradient at new point

  else:
      raise ValueError('Line search type unknown. Please check!')

  return x, evalf(x), k

**Q 3**

In [10]:
my_start_x = np.array([2.,2.])
my_tol= 1e-9

alpha_start = 1.0
rho = 0.5
gamma = 0.5

In [11]:
min_x1,min_obj1,iters1=find_minimizer_newtons(my_start_x, my_tol, NEWTONS_CONSTANT_STEP_LENGTH)
print('For Newtons method with constant steplength(=1) :')
print()
print('Minimizer value: ',min_x1)
print('Minimum obj fn value: ',min_obj1)
print('Total number of iterations: ',iters1)

For Newtons method with constant steplength(=1) :

Minimizer value:  [0.         0.00304488]
Minimum obj fn value:  3.4382653805813626168e-13
Total number of iterations:  16


In [12]:
min_x2,min_obj2,iters2=find_minimizer_newtons(my_start_x, my_tol, NEWTONS_BACKTRACKING_LINE_SEARCH, alpha_start,rho,gamma)
print('For Newtons method with backtracking line search :')
print()
print('Minimizer value: ',min_x2)
print('Minimum obj fn value: ',min_obj2)
print('Total number of iterations: ',iters2)

For Newtons method with backtracking line search :

Minimizer value:  [0.         0.00304488]
Minimum obj fn value:  3.4382653805813626168e-13
Total number of iterations:  16


We are getting same solutions within same number of iterations in both cases because after applying backtracking line search we are getting the same steplength(=1). Hence the same solution.

**Q 4**

In [13]:
#The method defines a way to construct D_k matrix used in scaling the gradient in the modified gradient descent scheme
def compute_D_k(x):
  assert type(x) is np.ndarray
  assert len(x) == 2
  H=evalh(x)
  return np.array([[1/H[0][0], 0],[0, 1/H[1][1]]])

In [14]:
#complete the code for gradient descent with scaling to find the minimizer

def find_minimizer_Q4(x, tol, line_search_type,*args):
  #Input: start_x is a numpy array of size 2, tol denotes the tolerance and is a positive float value
  assert type(x) is np.ndarray and len(x) == 2 #do not allow arbitrary arguments 
  assert type(tol) is float and tol>=0 

  g_x = evalg(x)
  
  alpha_start = args[0]
  rho = args[1]
  gamma = args[2]

  if line_search_type == WITHOUT_SCALING_BACKTRACKING_LINE_SEARCH:
    k = 0
    while (np.linalg.norm(g_x) > tol):
      step_length = compute_steplength_backtracking(x, g_x, alpha_start, rho, gamma)
      #implement the gradient descent steps here   
      x = np.add(x, np.multiply(step_length,g_x)) #update x = x + step_length*direction
      k += 1 #increment iteration
      g_x = evalg(x) #compute gradient at new point
  
  elif line_search_type == DIAG_MATRIX_SCALING_BACKTRACKING_LINE_SEARCH:
    k = 0
    while (np.linalg.norm(g_x) > tol):
      D_k = compute_D_k(x)
      direction = -np.matmul(D_k, g_x)
      step_length = compute_steplength_backtracking_scaled_direction(x, g_x, direction, alpha_start, rho, gamma)
      #implement the gradient descent steps here   
      x = np.add(x, np.multiply(step_length,direction)) #update x = x + step_length*direction
      k += 1 #increment iteration
      g_x = evalg(x) #compute gradient at new point
  
  else:  
    raise ValueError('Line search type unknown. Please check!')

  return x, evalf(x), k

In [15]:
my_start_x = np.array([np.float128(2),np.float128(2)])

In [16]:
min_x3,min_obj3,iters3=find_minimizer_Q4(my_start_x, my_tol, WITHOUT_SCALING_BACKTRACKING_LINE_SEARCH, alpha_start,rho,gamma)
print('For Gradient Descent Algorithm with Backtracking line search without scaling :')
print()
print('Minimizer value: ',min_x3)
print('Minimum obj fn value: ',min_obj3)
print('Total number of iterations: ',iters3)

  import sys


KeyboardInterrupt: ignored

In [17]:
min_x4,min_obj4,iters4=find_minimizer_Q4(my_start_x, my_tol, DIAG_MATRIX_SCALING_BACKTRACKING_LINE_SEARCH, alpha_start,rho,gamma)
print('For Gradient Descent Algorithm with Backtracking line search with scaling(with help of diagonal matrix) :')
print()
print('Minimizer value: ',min_x4)
print('Minimum obj fn value: ',min_obj4)
print('Total number of iterations: ',iters4)

For Gradient Descent Algorithm with Backtracking line search with scaling(with help of diagonal matrix) :

Minimizer value:  [0.         0.00304488]
Minimum obj fn value:  3.4382653805813657382e-13
Total number of iterations:  16


For Gradient Descent Algorithm with Backtracking line search without scaling the program was not terminating(i ran the code for more than 4 hours and it was still running it is taking no. of iterations of order $10^7$). So, clearly this algorithm is a very bad choice for this problem.

While Gradient descent algorithm with backtracking line search with scaling is again giving same result as of methods in **Part 3** of this question because the hessian matrix is coming out to be a diagonal matrix and the inverse of a diagonal matrix is just a diagonal matrix with entries as reciprocal of corrosponding diagonal entries of the matrix. So the inverse of hessian(which is used for scaling in Newtons method with Backtracking line search) is same as Diagonal matrix(which is used for scaling in Gradient descent with scaling with backtracking line search)