The given function is:

$f(x)=\sqrt{x_1^2 + 4} +\sqrt{x_2^2 + 4}$


So, the Gradient of the function $f(x)$ will be:

$\nabla f(\mathbf{x}) =
\begin{bmatrix}
  \frac{x_1}{\sqrt{x_1^2 +4}} & \frac{x_2}{\sqrt{x_2^2 +4}}
\end{bmatrix}$  \\
 Hessian of $f(\mathbf{x})$ will be:  \\

$\nabla^2 f(\mathbf{x}) =
\begin{bmatrix}
  \frac{4}{(x_1^2 +4)^{3/2}}& 0 \\ 0 & \frac{4}{(x_2^2 +4)^{3/2}}
\end{bmatrix}$  \\

In [1]:
import numpy as np
#Finding hessian matrix
def evalh(x): 
  assert type(x) is np.ndarray and len(x) == 2
  return np.array([[ 4/((np.sqrt((x[0]**2)+4))*((x[0]**2)+4)), 0], [0, 4/((np.sqrt((x[1]**2)+4))*((x[1]**2)+4))]])

In [2]:
#Finding gradient of the function
def evalg(x):  
  #Input: x is a numpy array of size 2 
  assert type(x) is np.ndarray and len(x) == 2 #do not allow arbitrary arguments 
  #after checking if the argument is valid, we can compute the gradient value
  #compute the gradient value and return it 
  return np.array([(x[0])/(np.sqrt((x[0]**2)+4)),  (x[1])/(np.sqrt((x[1]**2)+4))])

In [3]:
def evalf(x):
  assert type(x) is np.ndarray and len(x)==2
  return (np.sqrt((x[0]**2)+4)+np.sqrt((x[1]**2)+4))

In [4]:
#Evaluating the diagonal matrix for scaling
def compute_D_k(x):
  assert type(x) is np.ndarray
  assert len(x) == 2
  #compute and return D_k
  return np.linalg.inv(evalh(x))

In [5]:
def compute_steplength_backtracking_scaled_direction(x,gradf,alpha_start, rho, gamma): #add appropriate arguments to the function 
  assert type(x) is np.ndarray and len(x) == 2 
  assert type(gradf) is np.ndarray and len(gradf) == 2 
  #assert type(direction) is np.ndarray and len(direction) == 2 
  assert type(alpha_start) is float and alpha_start>=0.
  assert type(rho) is float and rho>=0.
  assert type(gamma) is float and gamma>=0. 
  
  alpha = alpha_start
  p = -gradf
  D_k = compute_D_k(x)
  m = np.matmul(D_k,p)
  while evalf(x)<evalf(x+alpha*m)+(np.matmul(np.matrix.transpose(gradf), m))*alpha*gamma:
    alpha = alpha*rho
  return alpha

In [6]:
def compute_steplength_backtracking(x, gradf, alpha_start, rho, gamma): #add appropriate arguments to the function 
  assert type(x) is np.ndarray and len(x) == 2 
  assert type(gradf) is np.ndarray and len(gradf) == 2 
  assert type(alpha_start) is float and alpha_start>=0. 
  assert type(rho) is float and rho>=0.
  assert type(gamma) is float and gamma>=0. 
  #Complete the code 
  alpha = alpha_start
  p = -gradf
  #implement the backtracking line search
  while evalf(x + alpha*p) > (evalf(x)-(gamma*alpha*np.dot(p,p))):
    #while evalf(x + alpha*p) > evalf(x) + gamma * alpha* (np.matmul(np.matrix.transpose(gradf), p) ):  
    alpha = rho*alpha

  #print('final step length:',alpha)
  return alpha

In [7]:
BACKTRACKING_LINE_SEARCH = 1
CONSTANT_STEP_LENGTH = 2

In [8]:
#complete the code for gradient descent without scaling to find the minimizer
def find_minimizer_gd(start_x, tol, line_search_type,*args):
  #Input: start_x is a numpy array of size 2, tol denotes the tolerance and is a positive float value
  assert type(start_x) is np.ndarray and len(start_x) == 2 #do not allow arbitrary arguments 
  assert type(tol) is float and tol>=0. 
  x = start_x
  g_x = evalg(x)
  gradf = evalg(x)

  #initialization for backtracking line search
  if(line_search_type == BACKTRACKING_LINE_SEARCH):
    alpha_start = args[0]
    rho = args[1]
    gamma = args[2]

  k = 0

  while (np.linalg.norm(g_x) > tol): #continue as long as the norm of gradient is not close to zero upto a tolerance tol
    if line_search_type == BACKTRACKING_LINE_SEARCH:
      step_length = compute_steplength_backtracking(x,gradf, alpha_start,rho, gamma) #call the new function you wrote to compute the steplength
    elif line_search_type == CONSTANT_STEP_LENGTH: #do a gradient descent with constant step length
      step_length = 1.0
    else:  
      raise ValueError('Line search type unknown. Please check!')
    
    #implement the gradient descent steps here   
    x = np.subtract(x, np.multiply(step_length,g_x)) #update x = x - step_length*g_x
    k += 1 #increment iteration
    g_x = evalg(x) #compute gradient at new point
    gradf = evalg(x)
    #print('iter:',k, ' x:', x, ' f(x):', evalf(x), ' grad at x:', g_x, ' gradient norm:', np.linalg.norm(g_x))
  return x ,k


In [9]:
#complete the code for gradient descent with scaling to find the minimizer

def find_minimizer_gdscaling(start_x, tol, line_search_type,*args):
  #Input: start_x is a numpy array of size 2, tol denotes the tolerance and is a positive float value
  assert type(start_x) is np.ndarray and len(start_x) == 2 #do not allow arbitrary arguments 
  assert type(tol) is float and tol>=0.
  x = start_x
  g_x = evalg(x)
  gradf = evalg(x)

  #initialization for backtracking line search
  if(line_search_type == BACKTRACKING_LINE_SEARCH):
    alpha_start = args[0]
    rho = args[1]
    gamma = args[2]

  k = 0

  while (np.linalg.norm(g_x) > tol): #continue as long as the norm of gradient is not close to zero upto a tolerance tol
    D_k = compute_D_k(x) 
    if line_search_type == BACKTRACKING_LINE_SEARCH:
      step_length = compute_steplength_backtracking_scaled_direction(x,gradf,alpha_start, rho, gamma) #call the new function you wrote to compute the steplength
    elif line_search_type == CONSTANT_STEP_LENGTH: #do a gradient descent with constant step length
      step_length = 1.0
    else:  
      raise ValueError('Line search type unknown. Please check!')
    
    #implement the gradient descent steps here   
    x = np.subtract(x, np.multiply(step_length,np.matmul(D_k,g_x)))
    k += 1 #increment iteration
    g_x = evalg(x) #compute gradient at new point
    gradf = evalg(x)
    print('iter:',k, ' x:', x, ' f(x):', evalf(x), ' grad at x:', g_x, ' gradient norm:', np.linalg.norm(g_x))

  return x ,k

#Que-2:

In [10]:
my_start_x = np.array([2.,2.])
my_tol= 1e-9
alpha_start = 1.
rho = 0.5
gamma = 0.5

In [11]:
print("For Newton Method with Constant Step Length:")
x_cons, k_cons = find_minimizer_gdscaling(my_start_x, my_tol, CONSTANT_STEP_LENGTH)
print("\t\t The Minimizer is : ",x_cons,"\n \t\t The minimum objective function value: ",evalf(x_cons),"\n \t\t And Num. of iterations: ",k_cons)


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
iter: 2721  x: [-2. -2.]  f(x): 5.656854249492381  grad at x: [-0.70710678 -0.70710678]  gradient norm: 0.9999999999999999
iter: 2722  x: [2. 2.]  f(x): 5.656854249492381  grad at x: [0.70710678 0.70710678]  gradient norm: 0.9999999999999999
iter: 2723  x: [-2. -2.]  f(x): 5.656854249492381  grad at x: [-0.70710678 -0.70710678]  gradient norm: 0.9999999999999999
iter: 2724  x: [2. 2.]  f(x): 5.656854249492381  grad at x: [0.70710678 0.70710678]  gradient norm: 0.9999999999999999
iter: 2725  x: [-2. -2.]  f(x): 5.656854249492381  grad at x: [-0.70710678 -0.70710678]  gradient norm: 0.9999999999999999
iter: 2726  x: [2. 2.]  f(x): 5.656854249492381  grad at x: [0.70710678 0.70710678]  gradient norm: 0.9999999999999999
iter: 2727  x: [-2. -2.]  f(x): 5.656854249492381  grad at x: [-0.70710678 -0.70710678]  gradient norm: 0.9999999999999999
iter: 2728  x: [2. 2.]  f(x): 5.656854249492381  grad at x: [0.70710678 0.70710678]  g

KeyboardInterrupt: ignored

The minimizer value keeps oscillating on points (-2,-2) and (2,2) .so,  this would not converge for the given starting point and the step length.

In [None]:
print("For Newton Method with Back Tracking Line Search:")
x_bls, k_bls = find_minimizer_gdscaling(my_start_x, my_tol, BACKTRACKING_LINE_SEARCH,alpha_start,rho,gamma)
print("\t\t The Minimizer is : ",x_bls,"\n \t\t The minimum objective function value: ",evalf(x_bls),"\n \t\t And Num. of iterations: ",k_bls)


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
iter: 48225  x: [-2. -2.]  f(x): 5.656854249492381  grad at x: [-0.70710678 -0.70710678]  gradient norm: 0.9999999999999999
iter: 48226  x: [2. 2.]  f(x): 5.656854249492381  grad at x: [0.70710678 0.70710678]  gradient norm: 0.9999999999999999
iter: 48227  x: [-2. -2.]  f(x): 5.656854249492381  grad at x: [-0.70710678 -0.70710678]  gradient norm: 0.9999999999999999
iter: 48228  x: [2. 2.]  f(x): 5.656854249492381  grad at x: [0.70710678 0.70710678]  gradient norm: 0.9999999999999999
iter: 48229  x: [-2. -2.]  f(x): 5.656854249492381  grad at x: [-0.70710678 -0.70710678]  gradient norm: 0.9999999999999999
iter: 48230  x: [2. 2.]  f(x): 5.656854249492381  grad at x: [0.70710678 0.70710678]  gradient norm: 0.9999999999999999
iter: 48231  x: [-2. -2.]  f(x): 5.656854249492381  grad at x: [-0.70710678 -0.70710678]  gradient norm: 0.9999999999999999
iter: 48232  x: [2. 2.]  f(x): 5.656854249492381  grad at x: [0.70710678 0.7071

KeyboardInterrupt: ignored

The Algorithm follows the same result for the backtracking line search also. It keeps rotating between (-2,-2) and (2,2).

#Que-3:

In [12]:
print("For GRadient Descent (Without Scaling and BLS):")
x_bls_ws, k_bls_ws = find_minimizer_gd(my_start_x, my_tol, BACKTRACKING_LINE_SEARCH,alpha_start,rho,gamma)
print("\t\t The Minimizer is : ",x_bls_ws,"\n \t\t The minimum objective function value: ",evalf(x_bls_ws),"\n \t\t And Num. of iterations: ",k_bls_ws)

For GRadient Descent (Without Scaling and BLS):
		 The Minimizer is :  [7.62525638e-10 7.62525638e-10] 
 		 The minimum objective function value:  4.0 
 		 And Num. of iterations:  32


In [13]:
print("For Gradient Descent Method with Constant Step Length(Without Scaling):")
x_cons, k_cons = find_minimizer_gd(my_start_x, my_tol, CONSTANT_STEP_LENGTH)
print("\t\t The Minimizer is : ",x_cons,"\n \t\t The minimum objective function value: ",evalf(x_cons),"\n \t\t And Num. of iterations: ",k_cons)


For Gradient Descent Method with Constant Step Length(Without Scaling):
		 The Minimizer is :  [7.62525638e-10 7.62525638e-10] 
 		 The minimum objective function value:  4.0 
 		 And Num. of iterations:  32


We get the same number of iterations from both line search methods.

#Que-4:

#For starting point = (8,8):

In [14]:
my_start_x = np.array([8.,8.])

In [16]:
print("For Newton Method with Constant Step Length:")
x_cons, k_cons = find_minimizer_gdscaling(my_start_x, my_tol, CONSTANT_STEP_LENGTH)
print("\t\t The Minimizer is : ",x_cons,"\n \t\t The minimum objective function value: ",evalf(x_cons),"\n \t\t And Num. of iterations: ",k_cons)


For Newton Method with Constant Step Length:
iter: 1  x: [-128. -128.]  f(x): 256.03124809288414  grad at x: [-0.99987795 -0.99987795]  gradient norm: 1.414040960485301
iter: 2  x: [524288. 524288.]  f(x): 1048576.0000076294  grad at x: [1. 1.]  gradient norm: 1.4142135623628054
iter: 3  x: [-3.6028797e+16 -3.6028797e+16]  f(x): 7.205759403792794e+16  grad at x: [-1. -1.]  gradient norm: 1.4142135623730951
iter: 4  x: [1.16920131e+49 1.16920131e+49]  f(x): 2.3384026197294447e+49  grad at x: [1. 1.]  gradient norm: 1.4142135623730951
iter: 5  x: [-3.99583814e+146 -3.99583814e+146]  f(x): 7.99167628880894e+146  grad at x: [-1. -1.]  gradient norm: 1.4142135623730951


  return np.array([[ 4/((np.sqrt((x[0]**2)+4))*((x[0]**2)+4)), 0], [0, 4/((np.sqrt((x[1]**2)+4))*((x[1]**2)+4))]])


LinAlgError: ignored

In [17]:
print("For Newton Method with Back Tracking Line Search:")
x_bls, k_bls = find_minimizer_gdscaling(my_start_x, my_tol, BACKTRACKING_LINE_SEARCH,alpha_start,rho,gamma)
print("\t\t The Minimizer is : ",x_bls,"\n \t\t The minimum objective function value: ",evalf(x_bls),"\n \t\t And Num. of iterations: ",k_bls)


For Newton Method with Back Tracking Line Search:
iter: 1  x: [-9. -9.]  f(x): 18.439088914585774  grad at x: [-0.97618706 -0.97618706]  gradient norm: 1.380536979925267
iter: 2  x: [14.90625 14.90625]  f(x): 30.07964687708286  grad at x: [0.99111868 0.99111868]  gradient norm: 1.40165348351778
iter: 3  x: [-37.77716303 -37.77716303]  f(x): 75.66013604265505  grad at x: [-0.99860151 -0.99860151]  gradient norm: 1.4122357980713027
iter: 4  x: [67.81547105 67.81547105]  f(x): 135.68991286687378  grad at x: [0.9995654 0.9995654]  gradient norm: 1.413598945837931
iter: 5  x: [-84.60172444 -84.60172444]  f(x): 169.2507226451951  grad at x: [-0.99972069 -0.99972069]  gradient norm: 1.4138185555680465
iter: 6  x: [211.2338078 211.2338078]  f(x): 422.4865515295721  grad at x: [0.99995518 0.99995518]  gradient norm: 1.4141501770163185
iter: 7  x: [-364.08593233 -364.08593233]  f(x): 728.1828509984783  grad at x: [-0.99998491 -0.99998491]  gradient norm: 1.4141922256673842
iter: 8  x: [372.36678

  return (np.sqrt((x[0]**2)+4)+np.sqrt((x[1]**2)+4))


 7.504710905416808e+54  grad at x: [1. 1.]  gradient norm: 1.4142135623730951
iter: 217  x: [-7.4958562e+54 -7.4958562e+54]  f(x): 1.4991712399190398e+55  grad at x: [-1. -1.]  gradient norm: 1.4142135623730951
iter: 218  x: [1.49210314e+55 1.49210314e+55]  f(x): 2.9842062709799856e+55  grad at x: [1. 1.]  gradient norm: 1.4142135623730951
iter: 219  x: [-2.92815988e+55 -2.92815988e+55]  f(x): 5.856319760414759e+55  grad at x: [-1. -1.]  gradient norm: 1.4142135623730951
iter: 220  x: [5.42356057e+55 5.42356057e+55]  f(x): 1.0847121136290558e+56  grad at x: [1. 1.]  gradient norm: 1.4142135623730951
iter: 221  x: [-7.84381685e+55 -7.84381685e+55]  f(x): 1.5687633694215894e+56  grad at x: [-1. -1.]  gradient norm: 1.4142135623730951
iter: 222  x: [1.22232518e+56 1.22232518e+56]  f(x): 2.444650369551632e+56  grad at x: [1. 1.]  gradient norm: 1.4142135623730951
iter: 223  x: [-2.57459868e+56 -2.57459868e+56]  f(x): 5.149197359772328e+56  grad at x: [-1. -1.]  gradient norm: 1.41421356237

  return np.array([[ 4/((np.sqrt((x[0]**2)+4))*((x[0]**2)+4)), 0], [0, 4/((np.sqrt((x[1]**2)+4))*((x[1]**2)+4))]])


LinAlgError: ignored

For this starting point, this method is not suitable as we get Singular Matrix error since the determinant value of the hessian matrix becomes zero. which is not wanted in this procedure.

#Que-5:

In [18]:
print("For GRadient Descent (Without Scaling and BLS):")
x_bls_ws, k_bls_ws = find_minimizer_gd(my_start_x, my_tol, BACKTRACKING_LINE_SEARCH,alpha_start,rho,gamma)
print("\t\t The Minimizer is : ",x_bls_ws,"\n \t\t The minimum objective function value: ",evalf(x_bls_ws),"\n \t\t And Num. of iterations: ",k_bls_ws)

For GRadient Descent (Without Scaling and BLS):
		 The Minimizer is :  [8.3177047e-10 8.3177047e-10] 
 		 The minimum objective function value:  4.0 
 		 And Num. of iterations:  39


In [None]:
print("For Gradient Descent Method with Constant Step Length(Without Scaling):")
x_cons, k_cons = find_minimizer_gd(my_start_x, my_tol, CONSTANT_STEP_LENGTH)
print("\t\t The Minimizer is : ",x_cons,"\n \t\t The minimum objective function value: ",evalf(x_cons),"\n \t\t And Num. of iterations: ",k_cons)


For Gradient Descent Method with Constant Step Length(Without Scaling):
		 The Minimizer is :  [1.10902729e-09 1.10902729e-09] 
 		 The minimum objective function value:  4.0 
 		 And Num. of iterations:  38


By using the gradient descent method without scaling, we get reasonably good solution of the fnction which seems converging to (0,0). And the number of iterationd are almost same for both the line seach methods.