-
Notifications
You must be signed in to change notification settings - Fork 96
/
shapefit_smoothed.m
330 lines (268 loc) · 12 KB
/
shapefit_smoothed.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
function [T_hub, T_lsq, T_cvx] = shapefit_smoothed(V, J)
% ShapeFit formulation for sensor network localization from pair directions
%
% function [T_hub, T_lsq, T_cvx] = shapefit_smoothed(V, J)
%
% This example in based on the paper http://arxiv.org/abs/1506.01437:
% ShapeFit: Exact location recovery from corrupted pairwise directions, 2015
% by Paul Hand, Choongbum Lee and Vladislav Voroninski.
%
% The problem is the following: there are n points t_1, ..., t_n in R^d
% which need to be estimated (localized). To this end, we are given
% measurements of some of the pairwise directions,
% v_ij = (t_i - t_j) / norm(t_i - t_j) + noise.
% Assume there are m such pairwise measurements, defining a graph with m
% edges over n nodes. J is the signed incidence matrix of this graph (see
% in code). To build J from lists I, J in R^m of nodes, use:
% J = sparse([I ; J], [(1:m)' ; (1:m)'], [ones(m, 1), -ones(m, 1)], n, m, 2*m);
%
% The measurements are arranged in the matrix V of size d x m. From V, we
% attempt to estimate t_1, ..., t_n, arranged in T, a matrix of size d x n.
% The estimation can only be done up to translation and scaling. The
% returned T's are centered: the columns sum to zero.
%
% ShapeFit is a formulation of this estimation problem which is robust to
% outliers. It is a nonsmooth, convex optimization problem over an affine
% space, i.e., a linear manifold. We smooth the cost using the pseudo-Huber
% loss cost and solve the problem using Manopt. This requires two
% ingredients: (1) a factory to describe the affine space, see
% shapefitfactory; (2) defining the cost and its derivative, and minimizing
% it while progressively tightening the smooth approximation (with
% warm-start).
%
% Simply run the example to see the results on random data. It compares the
% smoothed ShapeFit formulation against a least-squares formulation, when
% the measurements include outliers. See in code to vary the noise
% parameters, dimension d, number of nodes n, number of measurements m, ...
%
% Note: since the problem is convex, this returns the global optimum.
% This example also illustrates the use of Manopt for optimization under
% linear constraints: admittedly a simple subcase of optimization on
% manifolds.
%
%
% See also: shapefitfactory
% This file is part of Manopt: www.manopt.org.
% Original author: Nicolas Boumal, June 18, 2015.
% Contributors:
% Change log:
%
% Jan. 4, 2021 (NB):
% Changes for compatibility with Octave 6.1.0.
%
% Aug. 23, 2021 (XJ):
% Added AD to compute the egrad and the ehess
% Generic useful functions
center_cols = @(A) bsxfun(@minus, A, mean(A, 2));
normalize_cols = @(A) bsxfun(@times, A, 1./sqrt(sum(A.^2, 1)));
sqnorm_cols = @(A) sum(A.^2, 1);
% DATA GENERATION
%
% If no inputs are specified, generate some random data for
% illustration purposes.
if nargin == 0
% We estimate n points in R^d
d = 2;
n = 500;
% Those points are the columns of T : they are what we need to
% estimate, up to scaling and translation. We center T for
% convenience.
T_tru = center_cols(rand(d, n));
% We get a measurement of some pairs of relative directions.
% Which pairs is encoded in this graph, with J being the (signed,
% transposed) incidence matrix. J is n x m, sparse.
% There are roughly edge_fraction * n * (n-1) / 2 measurements.
edge_fraction = 0.10;
% [ii, jj] = erdosrenyi(n, edge_fraction);
[ii, jj] = randomgraph(n, edge_fraction*nchoosek(n, 2));
m = length(ii);
J = sparse([ii ; jj], [(1:m)' ; (1:m)'], ...
[ones(m, 1), -ones(m, 1)], n, m, 2*m);
% The measurements give the directions from one point to another.
% That is: we get the position difference, normalized. Here, with
% Gaussian noise. Least-squares will be well-suited for this.
sigma = .0;
V = normalize_cols(T_tru*J + sigma*randn(d, m)); % d x m
% Outliers: we replace some of the direction measurements by
% uniformly random unit-norm vectors.
outlier_fraction = .3;
outliers = rand(1, m) < outlier_fraction;
V(:, outliers) = normalize_cols(randn(d, sum(outliers)));
end % done generating random data
[d, m] = size(V);
n = size(J, 1);
assert(size(J, 2) == m, 'J must be n x m, with V of size d x m.');
VJt = full(V*J');
% This "manifold" describes the Euclidean space of matrices T of size
% d x n such that <VJt, T> = 1 and T has centered columns: T1 = 0.
problem.M = shapefitfactory(VJt);
% This linear operator computes the orthogonal projection of each
% difference ti - tj on the orthogonal space to v_ij.
% If the alignment is compatible with the data, then this is zero.
% A(T) is a d x m matrix.
A = @(T) Afun(T, V, J);
function AT = Afun(T, V, J)
TJ = T*J;
AT = TJ - bsxfun(@times, V, sum(V .* TJ, 1));
end
% Need the adjoint of A, too. Input is d x m, output is d x n.
Astar = @(W) (W - bsxfun(@times, V, sum(V.*W, 1)))*J';
% LEAST-SQUARES
%
% First, work with a least-squares formulation of the problem.
% That is, we minimize a (very nice) convex cost over an affine space.
% Since the smooth solvers in Manopt converge to critical points, this
% means they converge to global optimizers.
problem.cost = @(T) 0.5*norm(A(T), 'fro')^2;
problem.egrad = @(T) Astar(A(T));
problem.ehess = @(T, Tdot) Astar(A(Tdot));
% An alternative way to compute the egrad and the ehess is to use
% automatic differentiation provided in the deep learning toolbox (slower)
% Notice that the function norm is not supported for AD so far.
% Replace norm(...,'fro')^2 with cnormsqfro described in the file
% manoptADhelp.m. Also operations between sparse matrices and dlarrys
% is not supported so far. Transform V,J into full matrices. AD does
% not support bsxfunc as well. Translate it into the expression of
% repmat and .*. The whole thing would make the computation much slower
% than specifying the egrad and the ehess.
% V_full = full(V);
% J_full = full(J);
% problem.cost = @(T) 0.5*cnormsqfro(A_AD(T));
% function AT = A_AD(T)
% sum1 = sum(V_full .* (T*J_full), 1);
% repsum1 = repmat(sum1,size(sum1,1),1);
% AT = T*J_full - V_full.*repsum1;
% end
% call manoptAD to prepare AD for the problem structure
% problem = manoptAD(problem);
T_lsq = trustregions(problem);
% PSEUDO-HUBER SMOOTHED SHAPEFIT
%
% Now solve the same, but with a pseudo-Huber loss instead of
% least-squares.
% We iteratively sharpen the Huber function, i.e., reduce delta.
% It is important to warm start in such a fashion: trying to optimize
% with a random initial guess and a very small delta is typically slow.
% How fast one should decrease delta, and how accurately one should
% optimize at each intermediate stage, is open for research.
delta = 1;
T_hub = []; % We could use T_lsq as initial guess, too.
problem = rmfield(problem, 'ehess');
warning('off', 'manopt:getHessian:approx');
for iter = 1 : 12
delta = delta / 2;
h = @(x2) sqrt(x2 + delta^2) - delta; % pseudo-Huber loss
problem.cost = @(T) sum(h(sqnorm_cols(A(T))));
problem.egrad = @(T) Astar(bsxfun(@times, A(T), ...
1./sqrt(sqnorm_cols(A(T)) + delta^2)));
% AD version
% problem = rmfield(problem, 'egrad');
% problem = rmfield(problem, 'autogradfunc');
% problem = manoptAD(problem,'egrad');
% Solve, using the previous solution as initial guess.
T_hub = trustregions(problem, T_hub);
end
% CVX SHAPEFIT
%
% Actual ShapeFit cost (nonsmooth), with CVX.
% You can get CVX from http://cvxr.com/.
use_cvx_if_available = false;
if use_cvx_if_available && exist('cvx_version', 'file')
T_cvx = shapefit_cvx(V, J);
else
T_cvx = NaN(d, n);
end
% VISUALIZATION
%
% If T_true is available, for display, we scale the estimators to match
% the norm of the target. The scaling factor is obtained by minimizing
% the norm of the discrepancy : norm(T_tru - scale*T_xxx, 'fro').
% A plot is produced if d is 2 or 3.
if exist('T_tru', 'var') && (d == 2 || d == 3)
T_lsq = T_lsq * trace(T_tru'*T_lsq) / norm(T_lsq, 'fro')^2;
T_hub = T_hub * trace(T_tru'*T_hub) / norm(T_hub, 'fro')^2;
T_cvx = T_cvx * trace(T_tru'*T_cvx) / norm(T_cvx, 'fro')^2;
switch d
case 2
plot(T_tru(1, :), T_tru(2, :), 'bo', ...
T_lsq(1, :), T_lsq(2, :), 'rx', ...
T_hub(1, :), T_hub(2, :), 'k.', ...
T_cvx(1, :), T_cvx(2, :), 'g.');
case 3
plot3(T_tru(1, :), T_tru(2, :), T_tru(3, :), 'bo', ...
T_lsq(1, :), T_lsq(2, :), T_lsq(3, :), 'rx', ...
T_hub(1, :), T_hub(2, :), T_hub(3, :), 'k.', ...
T_cvx(1, :), T_cvx(2, :), T_cvx(3, :), 'g.');
end
legend('ground truth', 'least squares', ...
sprintf('pseudo-huber, \\delta = %.1e', delta), ...
'CVX ShapeFit');
title(sprintf(['ShapeFit problem : d = %d, n = %d, edge ' ...
'fraction = %.2g, sigma = %.2g, outlier ' ...
'fraction = %.2g'], d, n, edge_fraction, sigma, ...
outlier_fraction));
axis equal;
end
end
% If CVX is available, it can be used to solve the nonsmooth problem
% directly, very elegantly.
function T_cvx = shapefit_cvx(V, J)
d = size(V, 1);
n = size(J, 1); %#ok<NASGU>
VJt = full(V*J');
cvx_begin
variable T_cvx(d, n)
% We want to minimize this:
% minimize sum( norms( A(T_cvx), 2, 1 ) )
% But unfortunately, CVX doesn't handle bsxfun. Instead, we use
% repmat, which is slower, and hence hurts the comparison in
% disfavor of CVX.
minimize sum( norms( T_cvx*J - V .* repmat(sum(V .* (T_cvx*J), 1), [d, 1]) , 2, 1 ) )
sum(T_cvx, 2) == zeros(d, 1); %#ok<NODEF,EQEFF>
VJt(:).' * T_cvx(:) == 1; %#ok<EQEFF>
cvx_end
end
function [I, J, A] = erdosrenyi(n, p) %#ok<DEFNU>
% Generate a random Erdos-Renyi graph with n nodes and edge probability p.
%
% [I, J, A] = erdosrenyi(n, p)
%
% Returns a list of edges (I(k), J(k)) for a random, undirected Erdos-Renyi
% graph with n nodes and edge probability p. A is the adjacency matrix.
%
% I(k) < J(k) for all k, i.e., all(I<J) is true.
%
% The memory requirements for this simple implementation scale as O(n^2).
X = rand(n);
mask = X <= p;
X( mask) = 1;
X(~mask) = 0;
X = triu(X, 1);
% A is the adjacency matrix
A = X + X';
[I, J] = find(X);
end
function [I, J, A] = randomgraph(n, m)
% Generates a random graph over n nodes with at most m edges.
%
% function [I, J, A] = randomgraph(n, m)
%
% Selects m (undirected) edges from a graph over n nodes, uniformly at
% random, with replacement. The self edges and repeated edges are then
% discarded. The remaining number of edges is at most m, and should be
% close to m if m is much smaller than nchoosek(n, 2).
%
% The output satisfies all(I < J). A is the corresponding adjacency matrix.
%
% Uses O(m) memory (not O(n^2)), making it fit for large, sparse graphs.
% Generate m edges at random, with replacement, and remove repetitions.
IJ = unique(sort(randi(n, m, 2), 2), 'rows');
% Remove self-edges if any.
IJ = IJ(IJ(:, 1) ~= IJ(:, 2), :);
% Actual number of selected edges
k = size(IJ, 1);
I = IJ(:, 1);
J = IJ(:, 2);
% Build the adjacency matrix of the graph.
A = sparse([I ; J], [J ; I], ones(2*k, 1), n, n, 2*k);
end