In [1]:
import numpy as np
import matplotlib.pyplot as plt
import math
from timeit import default_timer as timer
from matplotlib.axis import Axis
from tabulate import tabulate

In [2]:
import numpy as np
from sklearn.datasets import load_digits
digits = load_digits()
#check the shape of digits data
print(digits.data.shape)
#check the shape of digits target
print(digits.target.shape)
#let us use the linear regression used in the previous lab
N = digits.data.shape[0] #Number of data points
d = digits.data.shape[1] #Dimension of data points
A = digits.data
#In the following code, we create a Nx1 vector of target labels
y = 1.0*np.ones([A.shape[0],1])
for i in range(digits.target.shape[0]):
  y[i] = digits.target[i]

(1797, 64)
(1797,)


**Q 1**

In [3]:
def evalf(x,lamda):
  assert type(x) is np.ndarray
  fx = np.linalg.norm(np.matmul(A,x) - y)
  fx = 0.5*(fx)**2 + 0.5*lamda*np.matmul(x.T,x)

  return fx

In [4]:
def evalg(x,lamda):
  assert type(x) is np.ndarray

  return np.matmul(A.T, np.matmul(A, x) - y) + lamda*x

In [5]:
def evalh(x,lamda):
  assert type(x) is np.ndarray

  return np.matmul(A.T,A) + lamda*np.identity(64)

In [6]:
def compute_steplength_backtracking_scaled_direction(x, gradf, lamda, direction, alpha_start, rho, gamma):
  assert type(x) is np.ndarray
  assert type(gradf) is np.ndarray
  assert type(direction) is np.ndarray
  assert type(alpha_start) is float and alpha_start>=0.
  assert type(rho) is float and rho>=0.
  assert type(gamma) is float and gamma>=0.
  
  alpha = alpha_start
  while evalf(x+alpha*direction, lamda)>evalf(x, lamda)+gamma*alpha*np.matmul(gradf.T,direction):
    alpha=rho*alpha

  return alpha

In [7]:
def find_minimizer_newtons(start_x, tol, lamda, *args):
  assert type(start_x) is np.ndarray 
  assert type(tol) is float and tol>=0 

  x = start_x
  n = x.shape[0]
  g_x = evalg(x, lamda)

  x_k_list =[]

  alpha_start = args[0]
  rho = args[1]
  gamma = args[2]

  k=0
  while (np.linalg.norm(g_x) > tol):
    D_k = np.linalg.inv(evalh(x, lamda))
    p_k = -np.matmul(D_k, g_x)
    step_length = compute_steplength_backtracking_scaled_direction(x, g_x, lamda, p_k, alpha_start, rho, gamma)
  
    x = np.add(x,np.multiply(step_length,p_k))
    x_k_list.append(x)
    k += 1 
    g_x = evalg(x, lamda)
  return x, evalf(x,lamda), k, x_k_list

In [17]:
'''
print("Using Newton's Method ")
print("\nDirect OLSLR:")
minimizer1,fn1,iter1, x_k_list1 = find_minimizer_newtons(np.zeros((64,1)), 1e-5, 0, 0.9, 0.5, 0.5)
#we have taken lambda=0 for direct OLSLR
print('x* = ',minimizer1)
'''

'\nprint("Using Newton\'s Method ")\nprint("\nDirect OLSLR:")\nminimizer1,fn1,iter1, x_k_list1 = find_minimizer_newtons(np.zeros((64,1)), 1e-5, 0, 0.9, 0.5, 0.5)\n#we have taken lambda=0 for direct OLSLR\nprint(\'x* = \',minimizer1)\n'

We are getting 'Singular matrix' error while applying Newtons method i.e. inverse of a matrix is not possible to obtain at some iteration.

In [9]:
print("Using Newton's Method ")
print("\nRegularized OLSLR:")
minimizer1,fn1,iter1, x_k_list1 = find_minimizer_newtons(np.zeros((64,1)), 1e-5, 0.001, 0.9, 0.5, 0.5)

print('xf* = ',minimizer1)

Using Newton's Method 

Regularized OLSLR:
x* =  [[ 0.00000000e+00]
 [ 9.69076882e-02]
 [-4.32192762e-03]
 [-7.75916338e-03]
 [ 7.49591987e-02]
 [ 1.13946582e-02]
 [-2.71293921e-02]
 [-7.34410664e-03]
 [ 9.98267907e-01]
 [-2.88089376e-02]
 [ 1.18688356e-01]
 [ 6.60922719e-02]
 [-5.57075915e-02]
 [-6.97056149e-02]
 [ 9.65844014e-02]
 [ 2.55196013e-01]
 [-7.29830665e-01]
 [ 2.42711745e-02]
 [ 7.73241544e-02]
 [-2.33008498e-02]
 [-5.64077619e-02]
 [ 5.72413971e-02]
 [-4.88675248e-02]
 [-2.62555998e-01]
 [-9.06071169e-01]
 [-1.49769638e-01]
 [ 5.64022790e-02]
 [ 8.96665319e-02]
 [ 8.39315938e-02]
 [ 9.85410047e-02]
 [ 1.69269848e-03]
 [-2.96649688e+00]
 [ 0.00000000e+00]
 [-1.54361470e-01]
 [-9.32404640e-03]
 [ 1.39497978e-01]
 [-3.69237437e-02]
 [ 5.46111773e-02]
 [-9.20425586e-03]
 [ 0.00000000e+00]
 [ 1.03326506e-01]
 [ 1.23983484e-01]
 [-1.37635230e-02]
 [ 5.40029021e-03]
 [ 1.31185700e-01]
 [ 5.49577815e-02]
 [ 2.24935899e-02]
 [ 7.48046265e-03]
 [ 6.17507773e-01]
 [ 2.44100619e-02]
 

**Q 2**

In [11]:
#code for BFGS method to find the minimizer
def find_minimizer_BFGS(start_x, tol, lamda, B_k, *args):
  #Input: start_x is a numpy array of size 2, tol denotes the tolerance and is a positive float value
  assert type(start_x) is np.ndarray #do not allow arbitrary arguments 
  assert type(tol) is float and tol>=0 

  n = len(start_x)
  x = start_x.reshape((n,1))
  g_x = evalg(x, lamda)
  x_k_list=[]
  #initialization for backtracking line search
  alpha_start = args[0]
  rho = args[1]
  gamma = args[2]

  k=0
  while (np.linalg.norm(g_x) > tol):
    p_k = -np.matmul(B_k, g_x)
    step_length = compute_steplength_backtracking_scaled_direction(x, g_x,lamda, p_k, alpha_start, rho, gamma)

    x_k = x
    s_k = np.multiply(step_length,p_k)
    #x = np.add(x, np.multiply(step_length,p_k)) #update x = x + step_length*direction
    x = np.add(x, s_k)#s_k = x - x_k
    y_k = evalg(x,lamda)-evalg(x_k, lamda)
    s_yT = np.matmul(s_k,y_k.T)
    y_sT = np.matmul(y_k, s_k.T)
    u_k = 1/(np.matmul(y_k.T,s_k))
    term_11 = np.subtract(np.identity(n) , u_k*s_yT)
    term_13 = np.subtract(np.identity(n) , u_k*y_sT)
    B_k = np.matmul(term_11,np.matmul(B_k,term_13)) + u_k*np.matmul(s_k,s_k.T)
    
    x_k_list.append(x)
    k += 1 #increment iteration
    g_x = evalg(x, lamda) #compute gradient at new point

  return x, evalf(x, lamda), k, x_k_list

In [13]:
B_k = np.identity(64)/10

print("Using BFGS Method ")
print("\nDirect OLSLR:")
minimizer3,fn3,iter3, x_k_list3 = find_minimizer_BFGS(np.zeros((64,1)), 1e-5, 0, B_k, 0.9, 0.5, 0.5)
#we have taken lambda=0 for direct OLSLR
print('x* = ',minimizer3)

Using BFGS Method 

Direct OLSLR:
x* =  [[ 0.00000000e+00]
 [ 9.69033568e-02]
 [-4.32277232e-03]
 [-7.76028320e-03]
 [ 7.49594380e-02]
 [ 1.13947198e-02]
 [-2.71328245e-02]
 [-7.33176335e-03]
 [ 9.98337967e-01]
 [-2.88095538e-02]
 [ 1.18688288e-01]
 [ 6.60916265e-02]
 [-5.57069862e-02]
 [-6.97063705e-02]
 [ 9.65876439e-02]
 [ 2.55182251e-01]
 [-7.29828605e-01]
 [ 2.42709916e-02]
 [ 7.73249597e-02]
 [-2.33000278e-02]
 [-5.64086144e-02]
 [ 5.72426822e-02]
 [-4.88717684e-02]
 [-2.62467763e-01]
 [-9.06562842e-01]
 [-1.49767791e-01]
 [ 5.64019538e-02]
 [ 8.96663590e-02]
 [ 8.39318159e-02]
 [ 9.85411936e-02]
 [ 1.69317613e-03]
 [-2.96805758e+00]
 [ 0.00000000e+00]
 [-1.54362338e-01]
 [-9.32361205e-03]
 [ 1.39497628e-01]
 [-3.69234835e-02]
 [ 5.46111776e-02]
 [-9.20505070e-03]
 [ 0.00000000e+00]
 [ 1.03279535e-01]
 [ 1.23983258e-01]
 [-1.37639605e-02]
 [ 5.40087816e-03]
 [ 1.31185107e-01]
 [ 5.49570758e-02]
 [ 2.24938237e-02]
 [ 7.47977909e-03]
 [ 6.17755029e-01]
 [ 2.44122357e-02]
 [ 1.42333

In [14]:
B_k = np.identity(64)/10

print("Using BFGS Method ")
print("\nRegularized OLSLR:")
minimizer4,fn4,iter4, x_k_list4 = find_minimizer_BFGS(np.zeros((64,1)), 1e-5, 0.001, B_k, 0.9, 0.5, 0.5)
#we have taken lambda=0 for direct OLSLR
print('xf* = ',minimizer4)

Using BFGS Method 

Regularized OLSLR:
x* =  [[ 0.00000000e+00]
 [ 9.69076882e-02]
 [-4.32192762e-03]
 [-7.75916339e-03]
 [ 7.49591987e-02]
 [ 1.13946582e-02]
 [-2.71293921e-02]
 [-7.34410666e-03]
 [ 9.98267907e-01]
 [-2.88089376e-02]
 [ 1.18688356e-01]
 [ 6.60922719e-02]
 [-5.57075915e-02]
 [-6.97056149e-02]
 [ 9.65844014e-02]
 [ 2.55196013e-01]
 [-7.29830662e-01]
 [ 2.42711745e-02]
 [ 7.73241544e-02]
 [-2.33008498e-02]
 [-5.64077619e-02]
 [ 5.72413971e-02]
 [-4.88675248e-02]
 [-2.62555998e-01]
 [-9.06071182e-01]
 [-1.49769638e-01]
 [ 5.64022790e-02]
 [ 8.96665319e-02]
 [ 8.39315938e-02]
 [ 9.85410047e-02]
 [ 1.69269848e-03]
 [-2.96649687e+00]
 [ 0.00000000e+00]
 [-1.54361470e-01]
 [-9.32404638e-03]
 [ 1.39497978e-01]
 [-3.69237437e-02]
 [ 5.46111773e-02]
 [-9.20425585e-03]
 [ 0.00000000e+00]
 [ 1.03326506e-01]
 [ 1.23983484e-01]
 [-1.37635230e-02]
 [ 5.40029022e-03]
 [ 1.31185700e-01]
 [ 5.49577815e-02]
 [ 2.24935899e-02]
 [ 7.48046267e-03]
 [ 6.17507773e-01]
 [ 2.44100619e-02]
 [ 1.