Skip to content
Permalink
master
Switch branches/tags
Go to file
 
 
Cannot retrieve contributors at this time
function [eta, Heta, hesscalls, stop_str, stats] = arc_gradient_descent(problem, x, grad, gradnorm, sigma, options, storedb, key)
% Subproblem solver for ARC based on gradient descent.
%
% [eta, Heta, hesscalls, stop_str, stats] =
% arc_gradient_descent(problem, x, grad, gradnorm, sigma, options, storedb, key)
%
% This routine approximately solves the following problem:
%
% min_{eta in T_x M} m(eta), where
%
% m(eta) = <eta, g> + .5 <eta, H[eta]> + (sigma/3) ||eta||^3
%
% where eta is a tangent vector at x on the manifold given by problem.M,
% g = grad is a tangent vector at x, H[eta] is the result of applying the
% Hessian of the problem at x along eta and the inner product and norm
% are those from the Riemannian structure on the tangent space T_x M.
%
% The solve is approximate in the sense that the returned eta only ought
% to satisfy the following conditions:
%
% ||gradient of m at eta|| <= theta*||eta||^2 and m(eta) <= m(0),
%
% where theta is specified in options.theta (see below for default value.)
% Since the gradient of the model at 0 is g, if it is zero, then eta = 0
% is returned. This is the only scenario where eta = 0 is returned.
%
% Numerical errors can perturb the described expected behavior.
%
% Inputs:
% problem: Manopt optimization problem structure
% x: point on the manifold problem.M
% grad: gradient of the cost function of the problem at x
% gradnorm: norm of the gradient, often available to the caller
% sigma: cubic regularization parameter (positive scalar)
% options: structure containing options for the subproblem solver
% storedb, key: caching data for problem at x
%
% Options specific to this subproblem solver:
% theta (0.5)
% Stopping criterion parameter for subproblem solver: the gradient of
% the model at the returned step should have norm no more than theta
% times the squared norm of the step.
% maxinner (100)
% Maximum number of iterations of the gradient descent algorithm.
%
% Outputs:
% eta: approximate solution to the cubic regularized subproblem at x
% Heta: Hess f(x)[eta] -- this is necessary in the outer loop, and it
% is often naturally available to the subproblem solver at the
% end of execution, so that it may be cheaper to return it here.
% hesscalls: number of Hessian calls during execution
% stop_str: string describing why the subsolver stopped
% stats: a structure specifying some statistics about inner work -
% we record the model cost value and model gradient norm at each
% inner iteration.
% This file is part of Manopt: www.manopt.org.
% Original authors: May 2, 2019,
% Bryan Zhu, Nicolas Boumal.
% Contributors:
% Change log:
%
% Aug. 19, 2019 (NB):
% Option maxiter_gradient renamed to maxinner to match trustregions.
% Some shortcuts
M = problem.M;
inner = @(u, v) M.inner(x, u, v);
rnorm = @(u) M.norm(x, u);
tangent = @(u) problem.M.tangent(x, u);
Hess = @(u) getHessian(problem, x, u, storedb, key);
% Counter for Hessian calls issued
hesscalls = 0;
% If the gradient has norm zero, return a zero step
if gradnorm == 0
eta = M.zerovec(x);
Heta = eta;
stop_str = 'Cost gradient is zero';
stats = struct('gradnorms', 0, 'func_values', 0);
return;
end
% Set local defaults here
localdefaults.theta = 0.5;
localdefaults.maxinner = 100;
% Merge local defaults with user options, if any
if ~exist('options', 'var') || isempty(options)
options = struct();
end
options = mergeOptions(localdefaults, options);
% Calculate the Cauchy point as our initial step
hess_grad = Hess(grad);
hesscalls = hesscalls + 1;
temp = inner(grad, hess_grad) / (2 * sigma * gradnorm * gradnorm);
R_c = -temp + sqrt(temp * temp + gradnorm / sigma);
eta = M.lincomb(x, -R_c / (1 * gradnorm), grad);
Heta = M.lincomb(x, -R_c / (1 * gradnorm), hess_grad);
% Main gradient descent iteration
gradnorms = zeros(options.maxinner, 1);
func_values = zeros(options.maxinner, 1);
gradnorm_reached = false;
j = 1;
while j < options.maxinner
% Calculate the gradient of the model
eta_norm = rnorm(eta);
neg_mgrad = M.lincomb(x, 1, Heta, 1, grad);
neg_mgrad = M.lincomb(x, -1, neg_mgrad, -sigma * eta_norm, eta);
neg_mgrad = tangent(neg_mgrad);
% Compute some statistics
gradnorms(j) = rnorm(neg_mgrad);
func_values(j) = inner(grad, eta) + 0.5 * inner(eta, Heta) + (sigma/3) * eta_norm^3;
% Check termination condition
if rnorm(neg_mgrad) <= options.theta * eta_norm^2
stop_str = 'Model grad norm condition satisfied';
gradnorm_reached = true;
break;
end
% Find the optimal step in the negative direction of the gradient
Hnmgrad = Hess(neg_mgrad);
hesscalls = hesscalls + 1;
step = solve_along_line(M, x, eta, neg_mgrad, grad, Hnmgrad, sigma);
if step == 0
stop_str = 'Failed optimal increase';
gradnorm_reached = true;
break;
end
eta = M.lincomb(x, 1, eta, step, neg_mgrad);
Heta = M.lincomb(x, 1, Heta, step, Hnmgrad);
j = j + 1;
end
% Check why we stopped iterating
if ~gradnorm_reached
stop_str = sprintf(['Reached max number of gradient descent iterations ' ...
'(options.maxinner = %d)'], options.maxinner);
j = j - 1;
end
% Return the point we ended on
eta = tangent(eta);
stats = struct('gradnorms', gradnorms(1:j), 'func_values', func_values(1:j));
if options.verbosity >= 4
fprintf('\n');
end
end