$\large\textbf{Consider the function :}$ \\
\begin{align}
\mathbf{q(x)} = \sqrt{x_1^2 + 4} + \sqrt{x_2^2 + 4} 
  \\
\end{align}

In [1]:
import numpy as np

In [2]:
#Now we will define a Python function which will compute and return the objective function value 
def evalf(x): 
  #Input: x is a numpy array of size 2
  assert type(x) is np.ndarray and len(x) == 2 #do not allow arbitrary arguments 

  return (np.sqrt((x[0]**2)+4)+np.sqrt((x[1]**2)+4))

In [3]:
#Now we will define a Python function which will compute and return the gradient value as a numpy array 
def evalg(x):  
  #Input: x is a numpy array of size 2 
  assert type(x) is np.ndarray and len(x) == 2 #do not allow arbitrary arguments 

  return np.array([x[0]/np.sqrt(x[0]**2+4), x[1]/np.sqrt(x[1]**2+4)])

In [4]:
#method to find Hessian matrix:
def evalh(x): 
  assert type(x) is np.ndarray 
  assert len(x) == 2

  return np.array([[4/(x[0]**2+4)**(3/2), 0],[0, 4/(x[1]**2+4)**(3/2)]])

In [5]:
#method to find inverse of Hessian matrix:
def evalh_inv(x):
  assert type(x) is np.ndarray 
  assert len(x) == 2

  return np.linalg.inv(evalh(x))

In [6]:
#Complete the module to compute the steplength by backtracking without scaling
def compute_steplength_backtracking(x, gradf, alpha_start, rho, gamma):
  assert type(x) is np.ndarray and len(gradf) == 2 
  assert type(gradf) is np.ndarray and len(gradf) == 2 
  assert type(alpha_start) is float and alpha_start>=0. 
  assert type(rho) is float and rho>=0.
  assert type(gamma) is float and gamma>=0. 
  
  alpha = alpha_start
  pk=-gradf
  while evalf(x+alpha*pk)>evalf(x)+gamma*alpha*np.linalg.multi_dot([gradf,pk]):
    alpha=rho*alpha
  
  return alpha

In [7]:
def compute_steplength_backtracking_scaled_direction(x, gradf, direction, alpha_start, rho, gamma):
  assert type(x) is np.ndarray and len(gradf) == 2 
  assert type(gradf) is np.ndarray and len(gradf) == 2 
  assert type(direction) is np.ndarray and len(direction) == 2 
  assert type(alpha_start) is float and alpha_start>=0. 
  assert type(rho) is float and rho>=0.
  assert type(gamma) is float and gamma>=0. 
  
  alpha = alpha_start
  while evalf(x+alpha*direction)>evalf(x)+gamma*alpha*np.linalg.multi_dot([gradf,direction]):
    alpha=rho*alpha

  return alpha

In [8]:
#line search type
NEWTONS_CONSTANT_STEP_LENGTH = 1
NEWTONS_BACKTRACKING_LINE_SEARCH = 2
WITHOUT_SCALING_BACKTRACKING_LINE_SEARCH = 3

In [9]:
#code for Newtons method with scaling to find the minimizer

def find_minimizer_newtons(x, tol, line_search_type, *args):
  #Input: start_x is a numpy array of size 2, tol denotes the tolerance and is a positive float value
  assert type(x) is np.ndarray and len(x) == 2 #do not allow arbitrary arguments 
  assert type(tol) is float and tol>=0 

  g_x = evalg(x)
  
  #initialization for backtracking line search
  if(line_search_type == NEWTONS_BACKTRACKING_LINE_SEARCH):
    alpha_start = args[0]
    rho = args[1]
    gamma = args[2]


  if line_search_type == NEWTONS_CONSTANT_STEP_LENGTH:
    k=0
    while (np.linalg.norm(g_x) > tol):
      D_k = evalh_inv(x)
      direction = -np.matmul(D_k, g_x)
      step_length = 1.0
      #implement the gradient descent steps here   
      x = np.add(x, np.multiply(step_length,direction)) #update x = x + step_length*direction
      k += 1 #increment iteration
      g_x = evalg(x) #compute gradient at new point

  elif line_search_type == NEWTONS_BACKTRACKING_LINE_SEARCH:
    k=0
    while (np.linalg.norm(g_x) > tol):
      D_k = evalh_inv(x)
      direction = -np.matmul(D_k, g_x)
      step_length = compute_steplength_backtracking_scaled_direction(x, g_x, direction, alpha_start, rho, gamma)
      #implement the gradient descent steps here   
      x = np.add(x, np.multiply(step_length,direction)) #update x = x + step_length*direction
      k += 1 #increment iteration
      g_x = evalg(x) #compute gradient at new point

  else:
      raise ValueError('Line search type unknown. Please check!')

  return x, evalf(x), k

**Q 2**

In [10]:
my_start_x = np.array([2.0,2.0])
my_tol= 1e-9

alpha_start = 1.0
rho = 0.5
gamma = 0.5

In [11]:
min_x1,min_obj1,iters1=find_minimizer_newtons(my_start_x, my_tol, NEWTONS_CONSTANT_STEP_LENGTH)
print('For Newtons method with constant steplength(=1) :')
print()
print('Minimizer value: ',min_x1)
print('Minimum obj fn value: ',min_obj1)
print('Total number of iterations: ',iters1)

KeyboardInterrupt: ignored

In [12]:
min_x2,min_obj2,iters2=find_minimizer_newtons(my_start_x, my_tol, NEWTONS_BACKTRACKING_LINE_SEARCH, alpha_start,rho,gamma)
print('For Newtons method with backtracking line search :')
print()
print('Minimizer value: ',min_x2)
print('Minimum obj fn value: ',min_obj2)
print('Total number of iterations: ',iters2)

For Newtons method with backtracking line search :

Minimizer value:  [0. 0.]
Minimum obj fn value:  4.0
Total number of iterations:  1


In **Newtons method with constant steplength(=1)** the program is not terminating because after first iteration we are getting $ \  x^1=(2,2)$ and after two iterations $x^2=(-2,-2)$ and again after three iteration $x^3=(2,2)$ and this trend is repeating. Hence, we are unable to converge to optimal value using steplength=1.

While with **Newtons method with backtracking line search** we are getting optimal soution in a single iteration.

**Q 3**

In [13]:
def find_minimizer_gd_wo_scaling(start_x, tol, line_search_type, *args): 
    assert type(start_x) is np.ndarray and len(start_x) == 2 
    assert type(tol) is float and tol>=0 
    x = start_x
    g_x = evalg(x)
    if(line_search_type == WITHOUT_SCALING_BACKTRACKING_LINE_SEARCH):
        alpha_start = args[0]
        rho = args[1]
        gamma = args[2]
    k = 0
    while (np.linalg.norm(g_x) > tol): 
        if line_search_type == WITHOUT_SCALING_BACKTRACKING_LINE_SEARCH:
            step_length = compute_steplength_backtracking(x,g_x, alpha_start,rho, gamma) 
        else:  
            raise ValueError('Line search type unknown. Please check!')   
        x = np.subtract(x, np.multiply(step_length,g_x))
        k += 1 
        g_x = evalg(x) 
    return x,evalf(x),k

In [14]:
min_x3,min_obj3,iters3=find_minimizer_gd_wo_scaling(my_start_x, my_tol,WITHOUT_SCALING_BACKTRACKING_LINE_SEARCH, alpha_start,rho,gamma)
print('For Gradient Descent Algorithm with Backtracking line search without scaling :')
print()
print('Minimizer value: ',min_x3)
print('Minimum obj fn value: ',min_obj3)
print('Total number of iterations: ',iters3)

For Gradient Descent Algorithm with Backtracking line search without scaling :

Minimizer value:  [7.62525638e-10 7.62525638e-10]
Minimum obj fn value:  4.0
Total number of iterations:  32


Newtons method using backtracking line search is more efficient because we are getting optimal solution in a single iteration only while in Gradient descent without scaling we needed 32 iterations.

**Q 4**

In [15]:
my_start_x1 = np.array([8.,8.])
my_tol= 1e-9

alpha_start = 1.0
rho = 0.5
gamma = 0.5

In [16]:
min_x4,min_obj4,iters4=find_minimizer_newtons(my_start_x1, my_tol, NEWTONS_CONSTANT_STEP_LENGTH)
print('For Newtons method with constant steplength(=1) :')
print()
print('Minimizer value: ',min_x4)
print('Minimum obj fn value: ',min_obj4)
print('Total number of iterations: ',iters4)

  


LinAlgError: ignored

In [17]:
min_x5,min_obj5,iters5=find_minimizer_newtons(my_start_x1, my_tol, NEWTONS_BACKTRACKING_LINE_SEARCH, alpha_start,rho,gamma)
print('For Newtons method with backtracking line search :')
print()
print('Minimizer value: ',min_x5)
print('Minimum obj fn value: ',min_obj5)
print('Total number of iterations: ',iters5)

For Newtons method with backtracking line search :

Minimizer value:  [2.83764947e-12 2.83764947e-12]
Minimum obj fn value:  4.0
Total number of iterations:  13


In Newtons method with constant step length we are getting "Singular matrix" error because at a step the Hessian metrix has determinant=0. Hence, its inverse is not possible. So, the update step is not possible to be executed.

While in Newtons method with backtracking line search we are getting optimal solution in 13 iterations.
We can also say that it is not a better idea to take the step length manually instead use backtracking line search to get step length at each iteration.

**Q 5**

In [18]:
min_x6,min_obj6,iters6=find_minimizer_gd_wo_scaling(my_start_x1, my_tol,WITHOUT_SCALING_BACKTRACKING_LINE_SEARCH, alpha_start,rho,gamma)
print('For Gradient Descent Algorithm with Backtracking line search without scaling :')
print()
print('Minimizer value: ',min_x6)
print('Minimum obj fn value: ',min_obj6)
print('Total number of iterations: ',iters6)

For Gradient Descent Algorithm with Backtracking line search without scaling :

Minimizer value:  [8.3177047e-10 8.3177047e-10]
Minimum obj fn value:  4.0
Total number of iterations:  39


In gradient descent algorithm with backtracking line search without scaling we are getting optimal solution in 39 iterations which is more than that of newtons method with backtracking line search. So, from here also we can say that Newtons method is a more efficient than other.