For the given function f($\mathbf{x}$) = $400x_1^2 + 0.004x_2^4$

  Since the given function is differentiable and its second order partial derivatives exist and are continuos, we may find the hessian matrix for it and it comes out to be as follows: 
$$\nabla^2f(\mathbf{x}) = H = \begin{bmatrix}
800 & 0 \\ 0 & 0.048x_2^2\end{bmatrix}$$
Thus we take $D^k$ as following:


$\mathbf{D^k} =
\begin{bmatrix}
  \frac{1}{800} & 0 \\ 0 & \frac{1}{0.048x_2^2} 
\end{bmatrix} $ which is a diagonal matrix whose diagonal entries are the inverse second order partial derivatives . 

In [1]:
import numpy as np 

#method to find Hessian matrix
def evalh(x): 
  assert type(x) is np.ndarray 
  assert len(x) == 2 
  return np.array([[800, 0] , [0,0.048*x[1]**2]])

In [2]:
def evalf(x):  
  #Input: x is a numpy array of size 2 
  assert type(x) is np.ndarray and len(x) == 2 #do not allow arbitrary arguments 
  #after checking if the argument is valid, we can compute the objective function value
  #compute the function value and return it 
  return 400*x[0]**2 + 0.004*x[1]**4

In [3]:
def evalg(x): #Evaluating gradient of the function
  assert type(x) is np.ndarray and len(x) ==2
  return np.array([800*x[0],0.016*x[1]**3])

In [4]:
#Evaluating the diagonal matrix for scaling
def compute_D_k(x):
  assert type(x) is np.ndarray
  assert len(x) == 2
  #compute and return D_k
  if np.linalg.det(evalh(x)) == 0:
    raise ValueError('Inverse not possible. Please check!')
  return np.linalg.inv(evalh(x))

In [5]:
def compute_steplength_backtracking_scaled_direction(x,gradf,alpha_start, rho, gamma): #add appropriate arguments to the function 
  assert type(x) is np.ndarray and len(x) == 2 
  assert type(gradf) is np.ndarray and len(gradf) == 2 
  #assert type(direction) is np.ndarray and len(direction) == 2 
  assert type(alpha_start) is float and alpha_start>=0.
  assert type(rho) is float and rho>=0.
  assert type(gamma) is float and gamma>=0. 
  
  alpha = alpha_start
  p = -gradf
  D_k = compute_D_k(x)
  m = np.matmul(D_k,p)
  while evalf(x)<evalf(x+alpha*m)+(np.matmul(np.matrix.transpose(gradf), m))*alpha*gamma:
    alpha = alpha*rho
  return alpha

In [6]:
def compute_steplength_backtracking(x, gradf, alpha_start, rho, gamma): #add appropriate arguments to the function 
  assert type(x) is np.ndarray and len(x) == 2 
  assert type(gradf) is np.ndarray and len(gradf) == 2 
  assert type(alpha_start) is float and alpha_start>=0. 
  assert type(rho) is float and rho>=0.
  assert type(gamma) is float and gamma>=0. 
  #Complete the code 
  alpha = alpha_start
  p = -gradf
  #implement the backtracking line search
  while evalf(x + alpha*p) > (evalf(x)-(gamma*alpha*np.dot(p,p))):
    #while evalf(x + alpha*p) > evalf(x) + gamma * alpha* (np.matmul(np.matrix.transpose(gradf), p) ):  
    alpha = rho*alpha

  #print('final step length:',alpha)
  return alpha

In [7]:
BACKTRACKING_LINE_SEARCH = 1
CONSTANT_STEP_LENGTH = 2

Code for gradient descent:

In [8]:
#complete the code for gradient descent without scaling to find the minimizer
def find_minimizer_gd(start_x, tol, line_search_type,*args):
  #Input: start_x is a numpy array of size 2, tol denotes the tolerance and is a positive float value
  assert type(start_x) is np.ndarray and len(start_x) == 2 #do not allow arbitrary arguments 
  assert type(tol) is float and tol>=0. 
  x = start_x
  g_x = evalg(x)
  gradf = evalg(x)

  #initialization for backtracking line search
  if(line_search_type == BACKTRACKING_LINE_SEARCH):
    alpha_start = args[0]
    rho = args[1]
    gamma = args[2]

  k = 0

  while (np.linalg.norm(g_x) > tol): #continue as long as the norm of gradient is not close to zero upto a tolerance tol
    if line_search_type == BACKTRACKING_LINE_SEARCH:
      step_length = compute_steplength_backtracking(x,gradf, alpha_start,rho, gamma) #call the new function you wrote to compute the steplength
    elif line_search_type == CONSTANT_STEP_LENGTH: #do a gradient descent with constant step length
      step_length = 1.0
    else:  
      raise ValueError('Line search type unknown. Please check!')
    
    #implement the gradient descent steps here   
    x = np.subtract(x, np.multiply(step_length,g_x)) #update x = x - step_length*g_x
    k += 1 #increment iteration
    g_x = evalg(x) #compute gradient at new point
    gradf = evalg(x)
    #print('iter:',k, ' x:', x, ' f(x):', evalf(x), ' grad at x:', g_x, ' gradient norm:', np.linalg.norm(g_x))
  return x ,k


In [9]:
#complete the code for gradient descent with scaling to find the minimizer

def find_minimizer_gdscaling(start_x, tol, line_search_type,*args):
  #Input: start_x is a numpy array of size 2, tol denotes the tolerance and is a positive float value
  assert type(start_x) is np.ndarray and len(start_x) == 2 #do not allow arbitrary arguments 
  assert type(tol) is float and tol>=0.
  x = start_x
  g_x = evalg(x)
  gradf = evalg(x)

  #initialization for backtracking line search
  if(line_search_type == BACKTRACKING_LINE_SEARCH):
    alpha_start = args[0]
    rho = args[1]
    gamma = args[2]

  k = 0

  while (np.linalg.norm(g_x) > tol): #continue as long as the norm of gradient is not close to zero upto a tolerance tol
    D_k = compute_D_k(x) 
    if line_search_type == BACKTRACKING_LINE_SEARCH:
      step_length = compute_steplength_backtracking_scaled_direction(x,gradf,alpha_start, rho, gamma) #call the new function you wrote to compute the steplength
    elif line_search_type == CONSTANT_STEP_LENGTH: #do a gradient descent with constant step length
      step_length = 1.0
    else:  
      raise ValueError('Line search type unknown. Please check!')
    
    #implement the gradient descent steps here   
    x = np.subtract(x, np.multiply(step_length,np.matmul(D_k,g_x)))
    k += 1 #increment iteration
    g_x = evalg(x) #compute gradient at new point
    gradf = evalg(x)

  return x ,k

#Que-3:

In [10]:
my_start_x = np.array([2.,2.])
my_tol= 1e-9
alpha_start = 1.
rho = 0.5
gamma = 0.5

In [11]:
print("For Newton Method with Constant Step Length:")
x_cons, k_cons = find_minimizer_gdscaling(my_start_x, my_tol, CONSTANT_STEP_LENGTH)
print("\t\t The Minimizer is : ",x_cons,"\n \t\t The minimum objective function value: ",evalf(x_cons),"\n \t\t And Num. of iterations: ",k_cons)
print("For Newton Method with Back Tracking Line Search:")
x_bls, k_bls = find_minimizer_gdscaling(my_start_x, my_tol, BACKTRACKING_LINE_SEARCH,alpha_start,rho,gamma)
print("\t\t The Minimizer is : ",x_bls,"\n \t\t The minimum objective function value: ",evalf(x_bls),"\n \t\t And Num. of iterations: ",k_bls)


For Newton Method with Constant Step Length:
		 The Minimizer is :  [0.         0.00304488] 
 		 The minimum objective function value:  3.4382653805813626e-13 
 		 And Num. of iterations:  16
For Newton Method with Back Tracking Line Search:
		 The Minimizer is :  [0.         0.00304488] 
 		 The minimum objective function value:  3.4382653805813626e-13 
 		 And Num. of iterations:  16


As we can see from the above output that number of iterations for newton's method using both type of method(Constant step length and the backtracking line search) are same i.e. 16 . 

And the Minimizer and minimum function values are also same. which denotes that for this particular starting point, both of the methods are almost equally efficient under newton's method algorithm.

#In the given Function, hessian matrix itself is a diagonal matrix. So, the matrix $D_k$ will be the inverse of the hessian matrix itself.

#Que-4:

In [12]:
#print("For Gradient Descent Method with Constant Step Length(without scaling):")
#x_cons_ws, k_cons_ws = find_minimizer_gd(my_start_x, my_tol, CONSTANT_STEP_LENGTH)
#print("\t\t The Minimizer is : ",x_cons_ws,"\n \t\t The minimum objective function value: ",evalf(x_cons_ws),"\n \t\t And Num. of iterations: ",k_cons_ws)
print("For Gradient Descent Method with Back Tracking Line Search(with scaling and using a diagonal matrix):")
x_bls_sc, k_bls_sc = find_minimizer_gdscaling(my_start_x, my_tol, BACKTRACKING_LINE_SEARCH,alpha_start,rho,gamma)
print("\t\t The Minimizer is : ",x_bls_sc,"\n \t\t The minimum objective function value: ",evalf(x_bls_sc),"\n \t\t And Num. of iterations: ",k_bls_sc)
print("For Gradient Descent Method with Back Tracking Line Search(without scaling):")
x_bls_ws, k_bls_ws = find_minimizer_gd(my_start_x, my_tol, BACKTRACKING_LINE_SEARCH,alpha_start,rho,gamma)
print("\t\t The Minimizer is : ",x_bls_ws,"\n \t\t The minimum objective function value: ",evalf(x_bls_ws),"\n \t\t And Num. of iterations: ",k_bls_ws)


For Gradient Descent Method with Back Tracking Line Search(with scaling and using a diagonal matrix):
		 The Minimizer is :  [0.         0.00304488] 
 		 The minimum objective function value:  3.4382653805813626e-13 
 		 And Num. of iterations:  16
For Gradient Descent Method with Back Tracking Line Search(without scaling):


KeyboardInterrupt: ignored

Using Gradient descent algorithm, we get the minimum value value of the function in a mere of 16 iterations when we use scaling using the diagonal matrix. But we may get the minimum value of function in a very high time and after running it for several hours in case of using gradient descent algorithm without scaling. That is why I have stopped the code after running it for 3 hrs.