-
Notifications
You must be signed in to change notification settings - Fork 97
/
barzilaiborwein.m
360 lines (305 loc) · 13.4 KB
/
barzilaiborwein.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
function [x, cost, info, options] = barzilaiborwein(problem, x, options)
% Riemannian Barzilai-Borwein solver with non-monotone line-search.
%
% function [x, cost, info, options] = barzilaiborwein(problem)
% function [x, cost, info, options] = barzilaiborwein(problem, x0)
% function [x, cost, info, options] = barzilaiborwein(problem, x0, options)
% function [x, cost, info, options] = barzilaiborwein(problem, [], options)
%
% Apply the Barzilai-Borwein minimization algorithm to the problem defined
% in the problem structure, starting at x0 if it is provided (otherwise, at
% a random point on the manifold). To specify options whilst not specifying
% an initial guess, give x0 as [] (the empty matrix).
%
% The algorithm uses its own special non-monotone line-search strategy.
% Therefore, no line-search algorithm should be specified in the problem
% structure or in the options structure.
%
% In most of the examples bundled with the toolbox (see link below), the
% solver can be replaced by the present one if need be.
%
% The outputs x and cost are the last reached point on the manifold and its
% cost. This is not necessarily the best point generated since the method
% is not monotone. The struct-array info contains information about the
% iterations:
% iter : the iteration number (0 for the initial guess)
% cost : cost value
% time : elapsed time in seconds
% gradnorm : Riemannian norm of the gradient
% stepsize : norm of the last tangent vector retracted
% linesearch : information logged by the line-search algorithm
% And possibly additional information logged by options.statsfun.
% For example, type [info.gradnorm] to obtain a vector of the successive
% gradient norms reached.
%
% The options structure is used to overwrite the default values. All
% options have a default value and are hence optional. To force an option
% value, pass an options structure with a field options.optionname, where
% optionname is one of the following and the default value is indicated
% between parentheses:
%
% tolgradnorm (1e-6)
% The algorithm terminates if the norm of the gradient drops below this.
% maxiter (1000)
% The algorithm terminates if maxiter iterations have been executed.
% maxtime (Inf)
% The algorithm terminates if maxtime seconds elapsed.
% minstepsize (1e-10)
% The algorithm terminates if the linesearch returns a displacement
% vector (to be retracted) smaller in norm than this value.
% linesearch (@linesearch_hint)
% This option should not be changed, as the present solver has its
% own dedicated line-search strategy.
% strategy ('direct')
% The strategy used for the Barzilai-Borwein stepsize
% 'direct', compute the direct step <s_k,s_k>/<s_k,y_k>
% 'inverse', compute the inverse step <s_k,y_k>/<y_k,y_k>
% 'alternate', alternates between direct and inverse step
% lambdamax (1e3)
% The maximum stepsize allowed by the Barzilai-Borwein method
% lambdamin (1e-3)
% The minimum stepsize allowed by the Barzilai-Borwein method
% lambda0 (1/10)
% The initial stepsize of the Barzilai-Borwein method
% ls_nmsteps (10)
% The non-monotone line-search checks a sufficient decrease with respect
% to the previous ls_nmsteps objective function values.
% statsfun (none)
% Function handle to a function that will be called after each
% iteration to provide the opportunity to log additional statistics.
% They will be returned in the info struct. See the generic Manopt
% documentation about solvers for further information.
% stopfun (none)
% Function handle to a function that will be called at each iteration
% to provide the opportunity to specify additional stopping criteria.
% See the generic Manopt documentation about solvers for further
% information.
% verbosity (3)
% Integer number used to tune the amount of output the algorithm
% generates during execution (mostly as text in the command window).
% The higher, the more output. 0 means silent.
% storedepth (2)
% Maximum number of different points x of the manifold for which a
% store structure will be kept in memory in the storedb. If the
% caching features of Manopt are not used, this is irrelevant. For
% this algorithm, a store depth of 2 should always be sufficient.
%
%
% The implementation of the Barzilai-Borwein method is based on the paper:
%
% B. Iannazzo, M. Porcelli, "The Riemannian Barzilai-Borwein method with
% nonmonotone line-search and the matrix geometric mean computation",
% IMA Journal of Numerical Analysis, to appear, https://doi.org/10.1093/imanum/drx015.
%
% See also: steepestdescent conjugategradient trustregions
% This file is part of Manopt: www.manopt.org.
% Original author: Margherita Porcelli, May 31, 2017
% Contributors: Nicolas Boumal, Bruno Iannazzo
% Change log:
%
% Aug. 2, 2018 (NB):
% Now using storedb.remove() to keep the cache lean.
% Verify that the problem description is sufficient for the solver.
if ~canGetCost(problem)
warning('manopt:getCost', ...
'No cost provided. The algorithm will likely abort.');
end
if ~canGetGradient(problem) && ~canGetApproxGradient(problem)
% Note: we do not give a warning if an approximate gradient is
% explicitly given in the problem description, as in that case the
% user seems to be aware of the issue.
warning('manopt:getGradient:approx', ...
['No gradient provided. Using an FD approximation instead (slow).\n' ...
'It may be necessary to increase options.tolgradnorm.\n' ...
'To disable this warning: warning(''off'', ''manopt:getGradient:approx'')']);
problem.approxgrad = approxgradientFD(problem);
end
% Ensure options exists as a structure
if ~exist('options', 'var') || isempty(options)
options = struct();
end
% Set local defaults here
localdefaults.minstepsize = 1e-10;
localdefaults.maxiter = 1000;
localdefaults.tolgradnorm = 1e-6;
% Upper and lower bound for the Barzilai-Borwein stepsize
localdefaults.lambdamax = 1e3;
localdefaults.lambdamin = 1e-3;
% Initial Barzilai-Borwein stepsize
localdefaults.lambda0 = 1/10;
% Barzilai-Borwein strategy (direct, inverse or alternate)
localdefaults.strategy = 'direct';
% Line-search parameters
% 1) Make sure the user didn't try to define a line search
if canGetLinesearch(problem) || isfield(options, 'linesearch')
error('manopt:BB:ls', ...
['The problem structure may not specify a line-search ' ...
'hint for the BB solver,\nand the options structure ' ...
'may not specify a line-search algorithm for BB.']);
end
% 2) Define the line-search parameters
problem.linesearch = @(x, d, storedb, key) 1;
options.linesearch = @linesearch_hint;
% The Armijo sufficient decrease parameter
localdefaults.ls_suff_decr = 1e-4;
% The previous steps checked in the non-monotone line-search strategy
localdefaults.ls_nmsteps = 10;
% Merge global and local defaults, then merge w/ user options, if any.
localdefaults = mergeOptions(getGlobalDefaults(), localdefaults);
options = mergeOptions(localdefaults, options);
% Shorthands for some parameters
strategy = options.strategy;
lambdamax = options.lambdamax;
lambdamin = options.lambdamin;
lambda0 = options.lambda0;
timetic = tic();
% If no initial point x is given by the user, generate one at random.
if ~exist('x', 'var') || isempty(x)
x = problem.M.rand();
end
% Create a store database and get a key for the current x
storedb = StoreDB(options.storedepth);
key = storedb.getNewKey();
% Compute objective-related quantities for x
[cost, grad] = getCostGrad(problem, x, storedb, key);
gradnorm = problem.M.norm(x, grad);
% Some variables below need to store information about iterations. We
% preallocate for a reasonable amount of intended iterations to avoid
% memory re-allocations.
mem_init_size = min(10000, options.maxiter+1);
% Store the cost value
f = zeros(mem_init_size, 1);
f(1) = cost;
% Iteration counter (at any point, iter is the number of fully executed
% iterations so far)
iter = 0;
% Save stats in a struct array info, and preallocate.
stats = savestats();
info(1) = stats;
info(mem_init_size).iter = [];
if options.verbosity >= 2
fprintf(' iter\t cost val\t grad. norm\n');
end
% Set the initial Barzilai-Borwein stepsize
lambda = lambda0;
% Start iterating until stopping criterion triggers
while true
% Display iteration information
if options.verbosity >= 2
fprintf('%5d\t%+.16e\t%.8e\n', iter, cost, gradnorm);
end
% Start timing this iteration
timetic = tic();
% Run standard stopping criterion checks
[stop, reason] = stoppingcriterion(problem, x, options, ...
info, iter+1);
% If none triggered, run specific stopping criterion check
if ~stop && stats.stepsize < options.minstepsize
stop = true;
reason = sprintf(['Last stepsize smaller than minimum ' ...
'allowed; options.minstepsize = %g.'], ...
options.minstepsize);
end
if stop
if options.verbosity >= 1
fprintf([reason '\n']);
end
break;
end
% Pick the descent direction as minus the gradient (scaled)
desc_dir = problem.M.lincomb(x, -lambda, grad);
% Execute the nonmonotone line search
k = iter + 1;
start = max(1, k - options.ls_nmsteps + 1);
[stepsize, newx, newkey, lsstats] = ...
options.linesearch(problem, x, desc_dir, max(f(start:k)), ...
-lambda * gradnorm^2, options, storedb, key);
% Updates the value of lambda
lambda = lambda * lsstats.alpha;
% Compute the new cost-related quantities for newx
[newcost, newgrad] = getCostGrad(problem, newx, storedb, newkey);
newgradnorm = problem.M.norm(newx, newgrad);
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% BARZILAI-BORWEIN STRATEGY
% Store the cost value
f(iter+2) = newcost;
% Transport the old gradient to newx
grad_transp = problem.M.transp(x, newx, grad);
% Compute the difference between grandients
Y = problem.M.lincomb(newx, 1, newgrad, -1, grad_transp);
% Compute the transported step
Stransp = problem.M.lincomb(x, -lambda, grad_transp);
% Compute the new Barzilai-Borwein step following the strategy
% direct strategy
if strcmp(strategy, 'direct')
num = problem.M.norm(newx, Stransp)^2;
den = problem.M.inner(newx, Stransp, Y);
if den > 0
lambda = min( lambdamax, max(lambdamin, num/den) );
else
lambda = lambdamax;
end
end
% inverse strategy
if strcmp(strategy, 'inverse')
num = problem.M.inner(newx, Stransp, Y);
den = problem.M.norm(newx, Y)^2;
if num > 0
lambda = min( lambdamax, max(lambdamin, num/den) );
else
lambda = lambdamax;
end
end
% alternate strategy
if strcmp(strategy, 'alternate')
num = problem.M.norm(newx, Stransp)^2;
den = problem.M.inner(newx, Stransp, Y);
den2 = problem.M.norm(newx, Y)^2;
if (den > 0)
if mod(iter,2)==0
lambda = min( lambdamax, max(lambdamin, num/den) );
else
lambda = min( lambdamax, max(lambdamin, den/den2) );
end
else
lambda = lambdamax;
end
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Update iterate info
storedb.removefirstifdifferent(key, newkey);
x = newx;
key = newkey;
cost = newcost;
grad = newgrad;
gradnorm = newgradnorm;
% iter is the number of iterations we have accomplished.
iter = iter + 1;
% Make sure we don't use too much memory for the store database.
storedb.purge();
% Log statistics for freshly executed iteration
stats = savestats();
info(iter+1) = stats;
end
info = info(1:iter+1);
if options.verbosity >= 1
fprintf('Total time is %f [s] (excludes statsfun)\n', ...
info(end).time);
end
% Routine in charge of collecting the current iteration stats
function stats = savestats()
stats.iter = iter;
stats.cost = cost;
stats.gradnorm = gradnorm;
if iter == 0
stats.stepsize = NaN;
stats.time = toc(timetic);
stats.linesearch = [];
else
stats.stepsize = stepsize;
stats.time = info(iter).time + toc(timetic);
stats.linesearch = lsstats;
end
stats = applyStatsfun(problem, x, storedb, key, options, stats);
end
end