In [1]:
import numpy as np
import matplotlib.pyplot as plt
import math
from timeit import default_timer as timer
from matplotlib.axis import Axis
from tabulate import tabulate
import timeit

In [2]:
def evalf(x,lamda):
  assert type(x) is np.ndarray
  fx = np.linalg.norm(np.matmul(A,x) - y)
  fx = 0.5*(fx)**2 + 0.5*lamda*np.matmul(x.T,x)

  return fx

In [3]:
def evalg(x,lamda):
  assert type(x) is np.ndarray

  return np.matmul(A.T, np.matmul(A, x) - y) + lamda*x

In [4]:
def evalh(x,lamda):
  assert type(x) is np.ndarray
  d = x.shape[0]
  return np.matmul(A.T,A) + lamda*np.identity(d)

In [5]:
def compute_steplength_backtracking_scaled_direction(x, gradf, lamda, direction, alpha_start, rho, gamma):
  assert type(x) is np.ndarray
  assert type(gradf) is np.ndarray
  assert type(direction) is np.ndarray
  assert type(alpha_start) is float and alpha_start>=0.
  assert type(rho) is float and rho>=0.
  assert type(gamma) is float and gamma>=0.
  
  alpha = alpha_start
  while evalf(x+alpha*direction, lamda)>evalf(x, lamda)+gamma*alpha*np.matmul(gradf.T,direction):
    alpha=rho*alpha

  return alpha

In [6]:
def find_minimizer_newtons(start_x, tol, lamda, *args):
  assert type(start_x) is np.ndarray 
  assert type(tol) is float and tol>=0 

  x = start_x
  n = x.shape[0]
  g_x = evalg(x, lamda)

  x_k_list =[]

  alpha_start = args[0]
  rho = args[1]
  gamma = args[2]

  failure = 0
  time_start_1 = timer()

  k=0
  while (np.linalg.norm(g_x) > tol):
    D_k = np.linalg.inv(evalh(x, lamda))
    p_k = -np.matmul(D_k, g_x)
    step_length = compute_steplength_backtracking_scaled_direction(x, g_x, lamda, p_k, alpha_start, rho, gamma)
  
    x = np.add(x,np.multiply(step_length,p_k))
    x_k_list.append(x)
    k += 1 
    g_x = evalg(x, lamda)

    if timer()-time_start_1 > 1200:
      failure = 1
      break;

  return x, failure #, evalf(x,lamda), k, x_k_list

**Applying Newtons method**

In [7]:
#Code for Newton method
import numpy as np
np.random.seed(1000) #for repeatability

N = 200
ds = [1000, 5000, 10000]
lambda_reg = 0.001
eps = np.random.randn(N,1) #random noise

time_for_d = {}
Ax_minus_y_norm_sq = {}
L2_norm_diff = {}
failure_d_list = []

#For each value of dimension in the ds array, we will check the behavior of Newton method
for i in range(np.size(ds)):
  d=ds[i]
  A = np.random.randn(N,d)
  #Normalize the columns
  for j in range(A.shape[1]):
    A[:,j] = A[:,j]/np.linalg.norm(A[:,j])

  xorig = np.ones((d,1))
  y = np.dot(A,xorig) + eps

  start = timeit.default_timer()
  #call Newton method with A,y,lambda and obtain the optimal solution x_opt
  x_opt,fail = find_minimizer_newtons(np.zeros((d,1)), 1e-5, lambda_reg, 0.9, 0.5, 0.5)
  newtontime = timeit.default_timer() - start #time is in seconds

  if fail == 1:
    failure_d_list.append[d]
  else:
    time_for_d[d] = newtontime
    Ax_minus_y_norm_sq[d] = (np.linalg.norm(np.matmul(A,x_opt) - y))**2
    L2_norm_diff[d] = (np.linalg.norm(x_opt - xorig))**2

  print("For d = ",d)
  print("Time taken = ",newtontime)
  print('||Ax* - y||^2 :', (np.linalg.norm(np.subtract(np.matmul(A, x_opt), y)))**2)
  print('||x* - x_orig||^2 :', (np.linalg.norm(np.subtract(x_opt, xorig)))**2)
  print('\n')
  #print the total time and the L2 norm difference || x_opt - xorig|| for Newton method

For d =  1000
Time taken =  1.1025540950013237
||Ax* - y||^2 : 5.676824554121419e-05
||x* - x_orig||^2 : 865.3152937000971


For d =  5000
Time taken =  94.90821207799854
||Ax* - y||^2 : 9.79262950921928e-06
||x* - x_orig||^2 : 4783.681693007695


For d =  10000
Time taken =  674.7822562710007
||Ax* - y||^2 : 3.571224214197108e-06
||x* - x_orig||^2 : 9829.015751261291




**Failure cases** (memory issue) : $d = 20000, 25000, 50000, 100000, 200000, 500000, 1000000 $

(Time issue failure is not observed)

**Applying BFGS**

In [8]:
#code for BFGS method to find the minimizer
def find_minimizer_BFGS(start_x, tol, lamda, B_k, *args):
  #Input: start_x is a numpy array of size 2, tol denotes the tolerance and is a positive float value
  assert type(start_x) is np.ndarray #do not allow arbitrary arguments 
  assert type(tol) is float and tol>=0 

  n = len(start_x)
  x = start_x.reshape((n,1))
  g_x = evalg(x, lamda)
  x_k_list=[]
  #initialization for backtracking line search
  alpha_start = args[0]
  rho = args[1]
  gamma = args[2]

  failure = 0
  time_start_1 = timer()

  k=0
  while (np.linalg.norm(g_x) > tol):
    p_k = -np.matmul(B_k, g_x)
    step_length = compute_steplength_backtracking_scaled_direction(x, g_x,lamda, p_k, alpha_start, rho, gamma)

    x_k = x
    s_k = np.multiply(step_length,p_k)
    #x = np.add(x, np.multiply(step_length,p_k)) #update x = x + step_length*direction
    x = np.add(x, s_k)#s_k = x - x_k
    y_k = evalg(x,lamda)-evalg(x_k, lamda)
    s_yT = np.matmul(s_k,y_k.T)
    y_sT = np.matmul(y_k, s_k.T)
    u_k = 1/(np.matmul(y_k.T,s_k))
    term_11 = np.subtract(np.identity(n) , u_k*s_yT)
    term_13 = np.subtract(np.identity(n) , u_k*y_sT)
    B_k = np.matmul(term_11,np.matmul(B_k,term_13)) + u_k*np.matmul(s_k,s_k.T)
    
    x_k_list.append(x)
    k += 1 #increment iteration
    g_x = evalg(x, lamda) #compute gradient at new point

    if timer()-time_start_1 > 1200:
      failure = 1
      break;    

  return x, failure

In [None]:
#Code for BFGS method
np.random.seed(1000) #for repeatability

N = 200
ds = [1000, 5000, 10000, 20000, 25000, 50000, 100000, 200000, 500000, 1000000]
lambda_reg = 0.001
eps = np.random.randn(N,1) #random noise

time_for_d2 = {}
Ax_minus_y_norm_sq2 = {}
L2_norm_diff2 = {}
failure_d_list2 = []

#For each value of dimension in the ds array, we will check the behavior of BFGS method
for i in range(np.size(ds)):
  d=ds[i]
  A = np.random.randn(N,d)
  #Normalize the columns
  for j in range(A.shape[1]):
    A[:,j] = A[:,j]/np.linalg.norm(A[:,j])
  xorig = np.ones((d,1))
  y = np.dot(A,xorig) + eps
  B_k = np.identity(d)/10

  start = timeit.default_timer()
  #call BFGS method with A,y,lambda and obtain the optimal solution x_opt_bfgs
  x_opt,fail = find_minimizer_BFGS(np.zeros((d,1)), 1e-5, lambda_reg, B_k, 0.9, 0.5, 0.5)
  bfgstime = timeit.default_timer() - start #time is in seconds

  if fail == 1:
    failure_d_list2.append(d)
    print('d = ',d,'is failure case (runtime exceeded 20 min)')
  else:
    time_for_d2[d] = bfgstime
    Ax_minus_y_norm_sq2[d] = (np.linalg.norm(np.matmul(A,x_opt) - y))**2
    L2_norm_diff2[d] = (np.linalg.norm(x_opt - xorig))**2
    print("For d = ",d)
    print("Time taken = ",bfgstime)
    print('||Ax* - y||^2 :', (np.linalg.norm(np.subtract(np.matmul(A, x_opt), y)))**2)
    print('||x* - x_orig||^2 :', (np.linalg.norm(np.subtract(x_opt, xorig)))**2)
    print('\n')
    #print the total time, ||Ax_opt_bfgs-y||^2 and the L2 norm difference || x_opt_bfgs -xorig||^2 for BFGS method

For d =  1000
Time taken =  4.273284377999516
||Ax* - y||^2 : 5.6716162511516556e-05
||x* - x_orig||^2 : 865.3153037923155


For d =  5000
Time taken =  293.07926795800086
||Ax* - y||^2 : 9.796492582376549e-06
||x* - x_orig||^2 : 4783.681692838852


d =  10000 is failure case (runtime exceeded 20 min)


**Failure cases** (memory issue) : $d = 20000, 25000, 50000, 100000, 200000, 500000, 1000000 $

Time failure for $d=10000$