examples/shapefit_smoothed.m

function [T_hub, T_lsq, T_cvx] = shapefit_smoothed(V, J)
% ShapeFit formulation for sensor network localization from pair directions
%
% function [T_hub, T_lsq, T_cvx] = shapefit_smoothed(V, J)
%
% This example in based on the paper http://arxiv.org/abs/1506.01437:
% ShapeFit: Exact location recovery from corrupted pairwise directions, 2015
% by Paul Hand, Choongbum Lee and Vladislav Voroninski.
%
% The problem is the following: there are n points t_1, ..., t_n in R^d
% which need to be estimated (localized). To this end, we are given
% measurements of some of the pairwise directions,
% v_ij = (t_i - t_j) / norm(t_i - t_j) + noise.
% Assume there are m such pairwise measurements, defining a graph with m
% edges over n nodes. J is the signed incidence matrix of this graph (see
% in code). To build J from lists I, J in R^m of nodes, use:
% J = sparse([I ; J], [(1:m)' ; (1:m)'], [ones(m, 1), -ones(m, 1)], n, m, 2*m);
%
% The measurements are arranged in the matrix V of size d x m. From V, we
% attempt to estimate t_1, ..., t_n, arranged in T, a matrix of size d x n.
% The estimation can only be done up to translation and scaling. The
% returned T's are centered: the columns sum to zero.
%
% ShapeFit is a formulation of this estimation problem which is robust to
% outliers. It is a nonsmooth, convex optimization problem over an affine
% space, i.e., a linear manifold. We smooth the cost using the pseudo-Huber
% loss cost and solve the problem using Manopt. This requires two
% ingredients: (1) a factory to describe the affine space, see
% shapefitfactory; (2) defining the cost and its derivative, and minimizing
% it while progressively tightening the smooth approximation (with
% warm-start).
%
% Simply run the example to see the results on random data. It compares the
% smoothed ShapeFit formulation against a least-squares formulation, when
% the measurements include outliers. See in code to vary the noise
% parameters, dimension d, number of nodes n, number of measurements m, ...
%
% Note: since the problem is convex, this returns the global optimum.
% This example also illustrates the use of Manopt for optimization under
% linear constraints: admittedly a simple subcase of optimization on
% manifolds.
%
%
% See also: shapefitfactory

% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, June 18, 2015.
% Contributors: 
% Change log: 
%
%   Jan.  4, 2021 (NB):
%       Changes for compatibility with Octave 6.1.0.
%
%   Aug. 23, 2021 (XJ):
%       Added AD to compute the egrad and the ehess  

    % Generic useful functions
    center_cols = @(A) bsxfun(@minus, A, mean(A, 2));
    normalize_cols = @(A) bsxfun(@times, A, 1./sqrt(sum(A.^2, 1)));
    sqnorm_cols = @(A) sum(A.^2, 1);

    
    % DATA GENERATION
    %
    % If no inputs are specified, generate some random data for
    % illustration purposes.
    if nargin == 0

        % We estimate n points in R^d
        d =   2;
        n = 500;

        % Those points are the columns of T : they are what we need to
        % estimate, up to scaling and translation. We center T for
        % convenience.
        T_tru = center_cols(rand(d, n));

        % We get a measurement of some pairs of relative directions.
        % Which pairs is encoded in this graph, with J being the (signed,
        % transposed) incidence matrix. J is n x m, sparse.
        % There are roughly edge_fraction * n * (n-1) / 2 measurements.
        edge_fraction = 0.10;
        % [ii, jj] = erdosrenyi(n, edge_fraction);
        [ii, jj] = randomgraph(n, edge_fraction*nchoosek(n, 2));
        m = length(ii);
        J = sparse([ii ; jj], [(1:m)' ; (1:m)'], ...
                   [ones(m, 1), -ones(m, 1)], n, m, 2*m);

        % The measurements give the directions from one point to another.
        % That is: we get the position difference, normalized. Here, with
        % Gaussian noise. Least-squares will be well-suited for this.
        sigma = .0;
        V = normalize_cols(T_tru*J + sigma*randn(d, m)); % d x m

        % Outliers: we replace some of the direction measurements by
        % uniformly random unit-norm vectors.
        outlier_fraction = .3;
        outliers = rand(1, m) < outlier_fraction;
        V(:, outliers) = normalize_cols(randn(d, sum(outliers)));
        
    end % done generating random data
    
    
    [d, m] = size(V);
    n = size(J, 1);
    assert(size(J, 2) == m, 'J must be n x m, with V of size d x m.');

    VJt = full(V*J');

    % This "manifold" describes the Euclidean space of matrices T of size
    % d x n such that <VJt, T> = 1 and T has centered columns: T1 = 0.
    problem.M = shapefitfactory(VJt);

    % This linear operator computes the orthogonal projection of each
    % difference ti - tj on the orthogonal space to v_ij.
    % If the alignment is compatible with the data, then this is zero.
    % A(T) is a d x m matrix.
    A = @(T) Afun(T, V, J);
    function AT = Afun(T, V, J)
        TJ = T*J;
        AT = TJ - bsxfun(@times, V, sum(V .* TJ, 1));
    end

    % Need the adjoint of A, too. Input is d x m, output is d x n.
    Astar = @(W) (W - bsxfun(@times, V, sum(V.*W, 1)))*J';

    
    % LEAST-SQUARES
    %
    % First, work with a least-squares formulation of the problem.
    % That is, we minimize a (very nice) convex cost over an affine space.
    % Since the smooth solvers in Manopt converge to critical points, this
    % means they converge to global optimizers.
    problem.cost  = @(T) 0.5*norm(A(T), 'fro')^2;
    problem.egrad = @(T) Astar(A(T));
    problem.ehess = @(T, Tdot) Astar(A(Tdot));

    % An alternative way to compute the egrad and the ehess is to use 
    % automatic differentiation provided in the deep learning toolbox (slower)
    % Notice that the function norm is not supported for AD so far.
    % Replace norm(...,'fro')^2 with cnormsqfro described in the file 
    % manoptADhelp.m. Also operations between sparse matrices and dlarrys 
    % is not supported so far. Transform V,J into full matrices. AD does 
    % not support bsxfunc as well. Translate it into the expression of 
    % repmat and .*. The whole thing would make the computation much slower 
    % than specifying the egrad and the ehess.
    % V_full = full(V);
    % J_full = full(J);
    % problem.cost  = @(T) 0.5*cnormsqfro(A_AD(T));
    % function AT = A_AD(T)
    %    sum1 = sum(V_full .* (T*J_full), 1);
    %    repsum1 = repmat(sum1,size(sum1,1),1);
    %    AT = T*J_full - V_full.*repsum1;
    % end
    % call manoptAD to prepare AD for the problem structure   
    % problem = manoptAD(problem);
    
    T_lsq = trustregions(problem);
    

    % PSEUDO-HUBER SMOOTHED SHAPEFIT
    %
    % Now solve the same, but with a pseudo-Huber loss instead of
    % least-squares.
    % We iteratively sharpen the Huber function, i.e., reduce delta.
    % It is important to warm start in such a fashion: trying to optimize
    % with a random initial guess and a very small delta is typically slow.
    % How fast one should decrease delta, and how accurately one should
    % optimize at each intermediate stage, is open for research.
    delta = 1;
    T_hub = []; % We could use T_lsq as initial guess, too.
    problem = rmfield(problem, 'ehess');
    warning('off', 'manopt:getHessian:approx');
    for iter = 1 : 12
        
        delta = delta / 2;
        
        h = @(x2) sqrt(x2 + delta^2) - delta; % pseudo-Huber loss

        problem.cost  = @(T) sum(h(sqnorm_cols(A(T))));
        problem.egrad = @(T) Astar(bsxfun(@times, A(T), ...
                                    1./sqrt(sqnorm_cols(A(T)) + delta^2)));

        % AD version      
        % problem = rmfield(problem, 'egrad');
        % problem = rmfield(problem, 'autogradfunc');
        % problem = manoptAD(problem,'egrad');                        
                                
        % Solve, using the previous solution as initial guess.
        T_hub = trustregions(problem, T_hub);
        
    end
    
    
    % CVX SHAPEFIT
    %
    % Actual ShapeFit cost (nonsmooth), with CVX.
    % You can get CVX from http://cvxr.com/.
    use_cvx_if_available = false;
    if use_cvx_if_available && exist('cvx_version', 'file')
        T_cvx = shapefit_cvx(V, J);
    else
        T_cvx = NaN(d, n);
    end
    
    
    % VISUALIZATION
    %
    % If T_true is available, for display, we scale the estimators to match
    % the norm of the target. The scaling factor is obtained by minimizing
    % the norm of the discrepancy : norm(T_tru - scale*T_xxx, 'fro').
    % A plot is produced if d is 2 or 3.
    if exist('T_tru', 'var') && (d == 2 || d == 3)
        
        T_lsq = T_lsq * trace(T_tru'*T_lsq) / norm(T_lsq, 'fro')^2;
        T_hub = T_hub * trace(T_tru'*T_hub) / norm(T_hub, 'fro')^2;
        T_cvx = T_cvx * trace(T_tru'*T_cvx) / norm(T_cvx, 'fro')^2;

    
        switch d
            case 2
                plot(T_tru(1, :), T_tru(2, :), 'bo', ...
                     T_lsq(1, :), T_lsq(2, :), 'rx', ...
                     T_hub(1, :), T_hub(2, :), 'k.', ...
                     T_cvx(1, :), T_cvx(2, :), 'g.');
            case 3
                plot3(T_tru(1, :), T_tru(2, :), T_tru(3, :), 'bo', ...
                      T_lsq(1, :), T_lsq(2, :), T_lsq(3, :), 'rx', ...
                      T_hub(1, :), T_hub(2, :), T_hub(3, :), 'k.', ...
                      T_cvx(1, :), T_cvx(2, :), T_cvx(3, :), 'g.');
        end

        legend('ground truth', 'least squares', ...
               sprintf('pseudo-huber, \\delta = %.1e', delta), ...
               'CVX ShapeFit');
           
        title(sprintf(['ShapeFit problem : d = %d, n = %d, edge ' ...
                       'fraction = %.2g, sigma = %.2g, outlier ' ...
                       'fraction = %.2g'], d, n, edge_fraction, sigma, ...
                       outlier_fraction));
        axis equal;
    
    end

end


% If CVX is available, it can be used to solve the nonsmooth problem
% directly, very elegantly.
function T_cvx = shapefit_cvx(V, J)
    d = size(V, 1);
    n = size(J, 1); %#ok<NASGU>
    VJt = full(V*J');
    cvx_begin
        variable T_cvx(d, n)
        % We want to minimize this:
        % minimize sum( norms( A(T_cvx), 2, 1 ) )
        % But unfortunately, CVX doesn't handle bsxfun. Instead, we use
        % repmat, which is slower, and hence hurts the comparison in
        % disfavor of CVX.
        minimize sum( norms( T_cvx*J - V .* repmat(sum(V .* (T_cvx*J), 1), [d, 1])  , 2, 1 ) )
        sum(T_cvx, 2) == zeros(d, 1); %#ok<NODEF,EQEFF>
        VJt(:).' * T_cvx(:) == 1; %#ok<EQEFF>
    cvx_end
end


function [I, J, A] = erdosrenyi(n, p) %#ok<DEFNU>
% Generate a random Erdos-Renyi graph with n nodes and edge probability p.
%
% [I, J, A] = erdosrenyi(n, p)
% 
% Returns a list of edges (I(k), J(k)) for a random, undirected Erdos-Renyi
% graph with n nodes and edge probability p. A is the adjacency matrix.
%
% I(k) < J(k) for all k, i.e., all(I<J) is true.
%
% The memory requirements for this simple implementation scale as O(n^2).

    X = rand(n);
    mask = X <= p;
    X( mask) = 1;
    X(~mask) = 0;
    X = triu(X, 1);

    % A is the adjacency matrix
    A = X + X';
    
    [I, J] = find(X);

end


function [I, J, A] = randomgraph(n, m)
% Generates a random graph over n nodes with at most m edges.
%
% function [I, J, A] = randomgraph(n, m)
%
% Selects m (undirected) edges from a graph over n nodes, uniformly at
% random, with replacement. The self edges and repeated edges are then
% discarded. The remaining number of edges is at most m, and should be
% close to m if m is much smaller than nchoosek(n, 2).
%
% The output satisfies all(I < J). A is the corresponding adjacency matrix.
%
% Uses O(m) memory (not O(n^2)), making it fit for large, sparse graphs.

    % Generate m edges at random, with replacement, and remove repetitions.
    IJ = unique(sort(randi(n, m, 2), 2), 'rows');
    
    % Remove self-edges if any.
    IJ = IJ(IJ(:, 1) ~= IJ(:, 2), :);
    
    % Actual number of selected edges
    k = size(IJ, 1);
    
    I = IJ(:, 1);
    J = IJ(:, 2);
    
    % Build the adjacency matrix of the graph.
    A = sparse([I ; J], [J ; I], ones(2*k, 1), n, n, 2*k);

end