manopt/solvers/arc/arc_gradient_descent.m

function [eta, Heta, hesscalls, stop_str, stats] = arc_gradient_descent(problem, x, grad, gradnorm, sigma, options, storedb, key)
% Subproblem solver for ARC based on gradient descent.
%
% [eta, Heta, hesscalls, stop_str, stats] = 
%     arc_gradient_descent(problem, x, grad, gradnorm, sigma, options, storedb, key)
%
% This routine approximately solves the following problem:
%
%   min_{eta in T_x M}  m(eta),  where
%
%       m(eta) = <eta, g> + .5 <eta, H[eta]> + (sigma/3) ||eta||^3
%
% where eta is a tangent vector at x on the manifold given by problem.M,
% g = grad is a tangent vector at x, H[eta] is the result of applying the
% Hessian of the problem at x along eta and the inner product and norm
% are those from the Riemannian structure on the tangent space T_x M.
%
% The solve is approximate in the sense that the returned eta only ought
% to satisfy the following conditions:
%
%   ||gradient of m at eta|| <= theta*||eta||^2   and   m(eta) <= m(0),
%
% where theta is specified in options.theta (see below for default value.)
% Since the gradient of the model at 0 is g, if it is zero, then eta = 0
% is returned. This is the only scenario where eta = 0 is returned.
%
% Numerical errors can perturb the described expected behavior.
%
% Inputs:
%     problem: Manopt optimization problem structure
%     x: point on the manifold problem.M
%     grad: gradient of the cost function of the problem at x
%     gradnorm: norm of the gradient, often available to the caller
%     sigma: cubic regularization parameter (positive scalar)
%     options: structure containing options for the subproblem solver
%     storedb, key: caching data for problem at x
%
% Options specific to this subproblem solver:
%   theta (0.5)
%     Stopping criterion parameter for subproblem solver: the gradient of
%     the model at the returned step should have norm no more than theta
%     times the squared norm of the step.
%   maxinner (100)
%     Maximum number of iterations of the gradient descent algorithm.
%
% Outputs:
%     eta: approximate solution to the cubic regularized subproblem at x
%     Heta: Hess f(x)[eta] -- this is necessary in the outer loop, and it
%           is often naturally available to the subproblem solver at the
%           end of execution, so that it may be cheaper to return it here.
%     hesscalls: number of Hessian calls during execution
%     stop_str: string describing why the subsolver stopped
%     stats: a structure specifying some statistics about inner work - 
%            we record the model cost value and model gradient norm at each
%            inner iteration.

% This file is part of Manopt: www.manopt.org.
% Original authors: May 2, 2019,
%    Bryan Zhu, Nicolas Boumal.
% Contributors:
% Change log: 
%
%   Aug. 19, 2019 (NB):
%       Option maxiter_gradient renamed to maxinner to match trustregions.

    % Some shortcuts
    M = problem.M;
    inner = @(u, v) M.inner(x, u, v);
    rnorm = @(u) M.norm(x, u);
    tangent = @(u) problem.M.tangent(x, u);
    Hess = @(u) getHessian(problem, x, u, storedb, key);
    
    % Counter for Hessian calls issued
    hesscalls = 0;
    
    % If the gradient has norm zero, return a zero step
    if gradnorm == 0
        eta = M.zerovec(x);
        Heta = eta;
        stop_str = 'Cost gradient is zero';
        stats = struct('gradnorms', 0, 'func_values', 0);
        return;
    end
    
    % Set local defaults here
    localdefaults.theta = 0.5;
    localdefaults.maxinner = 100;
    
    % Merge local defaults with user options, if any
    if ~exist('options', 'var') || isempty(options)
        options = struct();
    end
    options = mergeOptions(localdefaults, options);
    
    % Calculate the Cauchy point as our initial step
    hess_grad = Hess(grad);
    hesscalls = hesscalls + 1;
    temp = inner(grad, hess_grad) / (2 * sigma * gradnorm * gradnorm);    
    R_c = -temp + sqrt(temp * temp + gradnorm / sigma);
    eta = M.lincomb(x, -R_c / (1 * gradnorm), grad);
    Heta = M.lincomb(x, -R_c / (1 * gradnorm), hess_grad);
    
    % Main gradient descent iteration
    gradnorms = zeros(options.maxinner, 1);
    func_values = zeros(options.maxinner, 1);
    gradnorm_reached = false;
    j = 1;
    while j < options.maxinner
        % Calculate the gradient of the model
        eta_norm = rnorm(eta);
        neg_mgrad = M.lincomb(x, 1, Heta, 1, grad);
        neg_mgrad = M.lincomb(x, -1, neg_mgrad, -sigma * eta_norm, eta);
        neg_mgrad = tangent(neg_mgrad);
        
        % Compute some statistics
        gradnorms(j) = rnorm(neg_mgrad);
        func_values(j) = inner(grad, eta) + 0.5 * inner(eta, Heta) + (sigma/3) * eta_norm^3;

        % Check termination condition
        if rnorm(neg_mgrad) <= options.theta * eta_norm^2
            stop_str = 'Model grad norm condition satisfied';
            gradnorm_reached = true;
            break;
        end
        
        % Find the optimal step in the negative direction of the gradient
        Hnmgrad = Hess(neg_mgrad);
        hesscalls = hesscalls + 1;
        step = solve_along_line(M, x, eta, neg_mgrad, grad, Hnmgrad, sigma);
        if step == 0
            stop_str = 'Failed optimal increase';
            gradnorm_reached = true;
            break;
        end
        eta = M.lincomb(x, 1, eta, step, neg_mgrad);
        Heta = M.lincomb(x, 1, Heta, step, Hnmgrad);
        j = j + 1;
    end
    
    % Check why we stopped iterating
    if ~gradnorm_reached
        stop_str = sprintf(['Reached max number of gradient descent iterations ' ...
               '(options.maxinner = %d)'], options.maxinner);
        j = j - 1;
    end
    
    % Return the point we ended on
    eta = tangent(eta);
    stats = struct('gradnorms', gradnorms(1:j), 'func_values', func_values(1:j));    
    if options.verbosity >= 4
        fprintf('\n');
    end
        
end