Permalink
Fetching contributors…
Cannot retrieve contributors at this time
364 lines (322 sloc) 14.3 KB
function [x, cost, info, options] = arc(problem, x, options)
% Adaptive regularization by cubics (ARC) minimization algorithm for Manopt
%
% function [x, cost, info, options] = arc(problem)
% function [x, cost, info, options] = arc(problem, x0)
% function [x, cost, info, options] = arc(problem, x0, options)
% function [x, cost, info, options] = arc(problem, [], options)
%
% Apply the ARC minimization algorithm to the problem defined in the
% problem structure, starting at x0 if it is provided (otherwise, at a
% random point on the manifold). To specify options whilst not specifying
% an initial guess, give x0 as [] (the empty matrix).
%
% In most of the examples bundled with the toolbox (see link below), the
% solver can be replaced by the present one as is.
%
% With the default subproblem solver (@arc_lanczos), tuning parameter
% options.theta properly appears important for performance. Users may want
% to try different values in the range 1e-3 to 1e3 for a particular
% application.
%
% The outputs x and cost are the last reached point on the manifold and its
% cost. The struct-array info contains information about the iterations:
% iter (integer)
% The (outer) iteration number, i.e., number of steps considered
% so far (whether accepted or rejected). The initial guess is 0.
% cost (double)
% The corresponding cost value.
% gradnorm (double)
% The (Riemannian) norm of the gradient.
% hessiancalls (integer)
% The number of Hessian calls issued by the subproblem solver to
% compute this iterate.
% time (double)
% The total elapsed time in seconds to reach the corresponding cost.
% rho (double)
% The regularized performance ratio for the iterate.
% See options.rho_regularization.
% rhonum, rhoden (double)
% Numerator and denominator of the performance ratio, before
% regularization.
% accepted (boolean)
% Whether the proposed iterate was accepted or not.
% stepsize (double)
% The (Riemannian) norm of the vector returned by the subproblem
% solver and which is retracted to obtain the proposed next iterate.
% If accepted = true for the corresponding iterate, this is the size
% of the step from the previous to the new iterate. If accepted is
% false, the step was not executed and this is the size of the
% rejected step.
% sigma (double)
% The cubic regularization parameter at the outer iteration.
% And possibly additional information logged by options.statsfun or by
% the subproblem solver.
% For example, type [info.gradnorm] to obtain a vector of the successive
% gradient norms reached and [info.time] to obtain a vector with the
% corresponding computation times to reach that point.
%
% The options structure is used to overwrite the default values. All
% options have a default value and are hence optional. To force an option
% value, pass an options structure with a field options.optionname, where
% optionname is one of the following. The default value is indicated
% between parentheses. The subproblem solver may also accept options.
%
% tolgradnorm (1e-6)
% The algorithm terminates if the norm of the gradient drops below this.
% maxiter (1000)
% The algorithm terminates if maxiter (outer) iterations have been executed.
% maxtime (Inf)
% The algorithm terminates if maxtime seconds elapsed.
% sigma_0 (100 / trust-regions default maximum radius)
% Initial regularization parameter.
% sigma_min (1e-7)
% Minimum regularization parameter.
% eta_1 (0.1)
% If rho is below this, the step is unsuccessful (rejected).
% eta_2 (0.9)
% If rho exceeds this, the step is very successful.
% gamma_1 (0.1)
% Shrinking factor for regularization parameter if very successful.
% gamma_2 (1.5)
% Expansion factor for regularization parameter if unsuccessful.
% subproblemsolver (@arc_lanczos)
% Function handle to a subproblem solver. The subproblem solver will
% also see this options structure, so that parameters can be passed
% to it through here as well.
% rho_regularization (1e3)
% See help for the same parameter in the trustregions solver.
% statsfun (none)
% Function handle to a function that will be called after each
% iteration to provide the opportunity to log additional statistics.
% They will be returned in the info struct. See the generic Manopt
% documentation about solvers for further information.
% stopfun (none)
% Function handle to a function that will be called at each iteration
% to provide the opportunity to specify additional stopping criteria.
% See the generic Manopt documentation about solvers for further
% information.
% verbosity (3)
% Integer number used to tune the amount of output the algorithm
% generates during execution (mostly as text in the command window).
% The higher, the more output. 0 means silent.
% storedepth (2)
% Maximum number of different points x of the manifold for which a
% store structure will be kept in memory in the storedb. If the
% caching features of Manopt are not used, this is irrelevant. As of
% version 5.0, this is not particularly important.
%
%
% Please cite the Manopt paper as well as the research paper:
% @article{agarwal2018arcfirst,
% author = {Agarwal, N. and Boumal, N. and Bullins, B. and Cartis, C.},
% title = {Adaptive regularization with cubics on manifolds with a first-order analysis},
% journal = {arXiv preprint arXiv:1806.00065},
% year = {2018}
% }
%
%
% See also: trustregions conjugategradient manopt/examples arc_lanczos
% This file is part of Manopt: www.manopt.org.
% Original authors: May 1, 2018,
% Naman Agarwal, Brian Bullins, Nicolas Boumal and Coralia Cartis.
% Contributors:
% Change log:
M = problem.M;
% Verify that the problem description is sufficient for the solver.
if ~canGetCost(problem)
warning('manopt:getCost', ...
'No cost provided. The algorithm will likely abort.');
end
if ~canGetGradient(problem) && ~canGetApproxGradient(problem)
% Note: we do not give a warning if an approximate gradient is
% explicitly given in the problem description, as in that case the
% user seems to be aware of the issue.
warning('manopt:getGradient:approx', ['No gradient provided. ' ...
'Using an FD approximation instead (slow).\n' ...
'It may be necessary to increase options.tolgradnorm.\n'...
'To disable this warning: ' ...
'warning(''off'', ''manopt:getGradient:approx'')']);
problem.approxgrad = approxgradientFD(problem);
end
if ~canGetHessian(problem) && ~canGetApproxHessian(problem)
% Note: we do not give a warning if an approximate Hessian is
% explicitly given in the problem description, as in that case the
% user seems to be aware of the issue.
warning('manopt:getHessian:approx', ['No Hessian provided. ' ...
'Using an FD approximation instead.\n' ...
'To disable this warning: ' ...
'warning(''off'', ''manopt:getHessian:approx'')']);
problem.approxhess = approxhessianFD(problem);
end
% Set local defaults here
localdefaults.tolgradnorm = 1e-6;
localdefaults.maxiter = 1000;
localdefaults.maxtime = inf;
localdefaults.sigma_min = 1e-7;
localdefaults.eta_1 = 0.1;
localdefaults.eta_2 = 0.9;
localdefaults.gamma_1 = 0.1;
localdefaults.gamma_2 = 1.5;
localdefaults.storedepth = 2;
localdefaults.subproblemsolver = @arc_lanczos;
localdefaults.rho_regularization = 1e3;
% Merge global and local defaults, then merge w/ user options, if any.
localdefaults = mergeOptions(getGlobalDefaults(), localdefaults);
if ~exist('options', 'var') || isempty(options)
options = struct();
end
options = mergeOptions(localdefaults, options);
% Default initial sigma_0 is based on the initial Delta_bar of the
% trustregions solver.
if ~isfield(options, 'sigma_0')
if isfield(M, 'typicaldist')
options.sigma_0 = 100/M.typicaldist();
else
options.sigma_0 = 100/sqrt(M.dim());
end
end
timetic = tic();
% If no initial point x is given by the user, generate one at random.
if ~exist('x', 'var') || isempty(x)
x = M.rand();
end
% Create a store database and get a key for the current x.
storedb = StoreDB(options.storedepth);
key = storedb.getNewKey();
% Compute objective-related quantities for x.
[cost, grad] = getCostGrad(problem, x, storedb, key);
gradnorm = M.norm(x, grad);
% Initialize regularization parameter.
sigma = options.sigma_0;
% Iteration counter.
% At any point, iter is the number of fully executed iterations so far.
iter = 0;
% Save stats in a struct array info, and preallocate.
stats = savestats(problem, x, storedb, key, options);
info(1) = stats;
info(min(10000, options.maxiter+1)).iter = [];
if options.verbosity >= 2
fprintf(' iter\t\t\t\t\tcost val\t\t grad norm sigma #Hess\n');
end
% Iterate until stopping criterion triggers.
while true
% Display iteration information.
if options.verbosity >= 2
fprintf('%5d\t %+.16e\t %.8e %.2e', ...
iter, cost, gradnorm, sigma);
end
% Start timing this iteration.
timetic = tic();
% Run standard stopping criterion checks.
[stop, reason] = stoppingcriterion(problem, x, options, ...
info, iter+1);
if stop
if options.verbosity >= 1
fprintf(['\n' reason '\n']);
end
break;
end
% Solve the ARC subproblem.
% Outputs: eta is the tentative step (it is a tangent vector at x);
% Heta is the result of applying the Hessian at x along eta (this
% is often a natural by-product of the subproblem solver);
% hesscalls is the number of Hessian calls issued in the solver;
% stop_str is a string describing why the solver terminated; and
% substats is some statistics about the solver's work to be logged.
[eta, Heta, hesscalls, stop_str, substats] = ...
options.subproblemsolver(problem, x, grad, gradnorm, ...
sigma, options, storedb, key);
etanorm = M.norm(x, eta);
% Get a tentative next x by retracting the proposed step.
newx = M.retr(x, eta);
newkey = storedb.getNewKey();
% Compute the new cost-related quantities for proposal x.
% We could just compute the cost here, as the gradient is only
% necessary if the step is accepted; but we expect most steps are
% accepted, and sometimes the gradient can be computed faster if it
% is computed in conjunction with the cost.
[newcost, newgrad] = getCostGrad(problem, newx, storedb, newkey);
% Compute a regularized ratio between actual and model improvement.
rho_num = cost - newcost;
vec_rho = M.lincomb(x, 1, grad, .5, Heta);
rho_den = -M.inner(x, eta, vec_rho);
rho_reg = options.rho_regularization*eps*max(1, abs(cost));
rho = (rho_num+rho_reg) / (rho_den+rho_reg);
% Determine if the tentative step should be accepted or not.
if rho >= options.eta_1
accept = true;
arc_str = 'acc ';
% We accepted this step: erase cache of the previous point.
storedb.removefirstifdifferent(key, newkey);
x = newx;
key = newkey;
cost = newcost;
grad = newgrad;
gradnorm = M.norm(x, grad);
else
accept = false;
arc_str = 'REJ ';
% We rejected this step: erase cache of the tentative point.
storedb.removefirstifdifferent(newkey, key);
end
% Update the regularization parameter.
if rho >= options.eta_2
% Very successful step
arc_str(4) = '-';
sigma = max(options.sigma_min, options.gamma_1 * sigma);
elseif rho >= options.eta_1
% Successful step
arc_str(4) = ' ';
else
% Unsuccessful step
arc_str(4) = '+';
sigma = options.gamma_2 * sigma;
end
% iter is the number of iterations we have completed.
iter = iter + 1;
% Make sure we don't use too much memory for the store database.
storedb.purge();
% Log statistics for freshly executed iteration.
stats = savestats(problem, x, storedb, key, options);
info(iter+1) = stats;
if options.verbosity >= 2
fprintf(' %5d %s\n', hesscalls, [arc_str ' ' stop_str]);
end
end
% Truncate the struct-array to the part we populated
info = info(1:iter+1);
if options.verbosity >= 1
fprintf('Total time is %f [s] (excludes statsfun)\n', info(end).time);
end
% Routine in charge of collecting the current iteration statistics
function stats = savestats(problem, x, storedb, key, options)
stats.iter = iter;
stats.cost = cost;
stats.gradnorm = gradnorm;
stats.sigma = sigma;
if iter == 0
stats.hessiancalls = 0;
stats.stepsize = NaN;
stats.time = toc(timetic);
stats.rho = inf;
stats.rhonum = NaN;
stats.rhoden = NaN;
stats.accepted = true;
stats.subproblem = struct();
else
stats.hessiancalls = hesscalls;
stats.stepsize = etanorm;
stats.time = info(iter).time + toc(timetic);
stats.rho = rho;
stats.rhonum = rho_num;
stats.rhoden = rho_den;
stats.accepted = accept;
stats.subproblem = substats;
end
% Similar to statsfun with trustregions: the x and store passed to
% statsfun are that of the most recently accepted point after the
% iteration fully executed.
stats = applyStatsfun(problem, x, storedb, key, options, stats);
end
end